3 yıl önce · f87f7a7a22
--- a/ARM.CMSIS.pdsc
+++ b/ARM.CMSIS.pdsc
@@ -15,6 +15,7 @@
 
				        - Changed return types of all API's
			
 
				        - Support for int16 average pooling DSP implementation
			
 
				        - Support for DSP extension optimization for int16 depthwise_conv
			
 
				+       - Support for MVEI extension optimization for int16 depthwise_conv
			
 
				        - Support for MVEI extension optimization for int16 max pooling
			
 
				     </release>
			
 
				     <release version="5.9.0" date="2022-05-02">
			
@@ -2297,6 +2298,7 @@ and 8-bit Java bytecodes in Jazelle state.
 
				         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c"/>
			
 
				+        <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c"/>
			
 
				         <file category="source" name="CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c"/>
			
--- a/CMSIS/DoxyGen/NN/src/history.txt
+++ b/CMSIS/DoxyGen/NN/src/history.txt
@@ -13,6 +13,7 @@
 
				       <li> Replaced arm_status with arm_cmsis_nn_status struct </li>
			
 
				       <li> Added DSP support in arm_avgpool_s16.c </li>
			
 
				       <li> Added support for DSP extension optimization for int16 depthwise_conv </li>
			
 
				+      <li> Added support for MVEI extension optimization for int16 depthwise_conv </li>
			
 
				       <li> Added support for MVEI extension optimization for int16 max pooling </li>
			
 
				       </ul>
			
 
				     </td>
			
@@ -51,7 +52,7 @@
 
				     <td>V3.0.0</td>
			
 
				     <td>
			
 
				     <ul>
			
 
				-      <li>Updated arm_fully_connected_s8 to use zero weight offset<br> 
			
 
				+      <li>Updated arm_fully_connected_s8 to use zero weight offset<br>
			
 
				       as per the TFLM int8 quantization spec. The API is the same but,<br>
			
 
				       the weight offset parameter is expected to be zero<br> </li>
			
 
				       <li> Added unit test for Softmax </li>
			
--- a/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_nnsupportfunctions.h
			
 
				  * Description:  Public header file of support functions for CMSIS NN Library
			
 
				  *
			
 
				- * $Date:        10 May 2022
			
 
				- * $Revision:    V.8.1.0
			
 
				+ * $Date:        6 July 2022
			
 
				+ * $Revision:    V.8.2.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  * -------------------------------------------------------------------- */
			
@@ -557,6 +557,43 @@ q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
 
				                                     const int32_t *const output_bias,
			
 
				                                     q7_t *out);
			
 
				 
			
 
				+/**
			
 
				+ * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
			
 
				+ *        Dimensions are the same for lhs and rhs.
			
 
				+ *
			
 
				+ * @param[in]      lhs             Input left-hand side matrix
			
 
				+ * @param[in]      rhs             Input right-hand side matrix (transposed)
			
 
				+ * @param[in]      num_ch          Number of channels in LHS/RHS
			
 
				+ * @param[in]      out_shift       Per channel output shift. Length of vector is equal to number of channels.
			
 
				+ * @param[in]      out_mult        Per channel output multiplier. Length of vector is equal to number of channels.
			
 
				+ * @param[in]      activation_min  Minimum value to clamp the output to. Range: int8
			
 
				+ * @param[in]      activation_max  Maximum value to clamp the output to. Range: int8
			
 
				+ * @param[in]       row_x_col       (row_dimension * col_dimension) of LHS/RHS matrix
			
 
				+ * @param[in]      output_bias     Per channel output bias. Length of vector is equal to number of channels.
			
 
				+ * @param[in]      out             Output pointer
			
 
				+ *
			
 
				+ * @return         The function returns one of the two
			
 
				+ *                  - Updated output pointer if an implementation is available
			
 
				+ *                  - NULL if no implementation is available.
			
 
				+ *
			
 
				+ * @note           If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
			
 
				+ * out for the following.
			
 
				+ *                  - Output shift
			
 
				+ *                  - Output multiplier
			
 
				+ *                  - Output bias
			
 
				+ *                  - rhs
			
 
				+ */
			
 
				+int16_t *arm_nn_depthwise_conv_nt_t_s16(const int16_t *lhs,
			
 
				+                                        const q7_t *rhs,
			
 
				+                                        const uint16_t num_ch,
			
 
				+                                        const int32_t *out_shift,
			
 
				+                                        const int32_t *out_mult,
			
 
				+                                        const int32_t activation_min,
			
 
				+                                        const int32_t activation_max,
			
 
				+                                        const uint16_t row_x_col,
			
 
				+                                        const int64_t *const output_bias,
			
 
				+                                        int16_t *out);
			
 
				+
			
 
				 /**
			
 
				  *@brief Matrix-multiplication function for convolution with reordered columns
			
 
				  *@param[in]       pA          pointer to operand A
			
@@ -662,8 +699,8 @@ __STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, const q7_t val, uint32_t bloc
 
				                    "   vstrb.8                 q0, [%[in]], #16            \n"
			
 
				                    "   letp                    lr, 2b                     \n"
			
 
				                    "1:                                                    \n"
			
 
				-                   : [ in ] "+r"(dst)
			
 
				-                   : [ cnt ] "r"(block_size), [ set_val ] "r"(val)
			
 
				+                   : [in] "+r"(dst)
			
 
				+                   : [cnt] "r"(block_size), [set_val] "r"(val)
			
 
				                    : "q0", "memory", "r14");
			
 
				 #else
			
 
				     memset(dst, val, block_size);
			
@@ -1010,14 +1047,26 @@ __STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst, const q7_t *__REST
 
				                    "   vstrb.8                 q0, [%[out]], #16           \n"
			
 
				                    "   letp                    lr, 2b                     \n"
			
 
				                    "1:                                                    \n"
			
 
				-                   : [ in ] "+r"(src), [ out ] "+r"(dst)
			
 
				-                   : [ cnt ] "r"(block_size)
			
 
				+                   : [in] "+r"(src), [out] "+r"(dst)
			
 
				+                   : [cnt] "r"(block_size)
			
 
				                    : "q0", "memory", "r14");
			
 
				 #else
			
 
				     memcpy(dst, src, block_size);
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * @brief           memcpy wrapper for int16
			
 
				+ * @param[in, out]  dst         Destination pointer
			
 
				+ * @param[in]       src         Source pointer.
			
 
				+ * @param[in]       block_size  Number of bytes to copy.
			
 
				+ *
			
 
				+ */
			
 
				+__STATIC_FORCEINLINE void arm_memcpy_q15(q15_t *__RESTRICT dst, const q15_t *__RESTRICT src, uint32_t block_size)
			
 
				+{
			
 
				+    memcpy(dst, src, block_size);
			
 
				+}
			
 
				+
			
 
				 #if defined(ARM_MATH_MVEI)
			
 
				 /**
			
 
				  * @brief           Vector saturating doubling high multiply returning high half.
			
--- a/CMSIS/NN/README.md
+++ b/CMSIS/NN/README.md
@@ -28,17 +28,17 @@ Group | API | Base Operator | Input Constraints | Additional memory required for
 
				 ||arm_convolve_wrapper_s8()|CONV| None |n.a.| Yes | Yes |The additional memory required depends on the optimal convolution function called.|
			
 
				 ||arm_convolve_s8()|CONV| None |4 * (ker_x * ker_y * input_ch + delta)| Yes | Yes |delta - MVE only|
			
 
				 ||arm_convolve_1x1_s8_fast() | CONV | dilation = 1 <br/> ker_x = 1, ker_y = 1 <br/> pad = 0<br/> stride = 1<br/> input_ch % 4 = 0| No | Yes |Yes ||
			
 
				-||arm_convolve_1_x_n_s8() | CONV | dilation = 1 <br/> output_y % 4 = 0 | Yes. Refer API for details |Yes |Yes|Not all implementations require additional memory|
			
 
				+||arm_convolve_1_x_n_s8() | CONV | dilation = 1 <br/> output_y % 4 = 0 | Yes. Refer to API for details |Yes |Yes|Not all implementations require additional memory|
			
 
				 ||arm_depthwise_conv_wrapper_s8()| DEPTHWISE_CONV | None |n.a.| Yes| Yes| The additional memory required depends on the optimal convolution function called|
			
 
				 ||arm_depthwise_conv_3x3_s8() | DEPTHWISE_CONV | dilation = 1 <br/> depth_multiplier = 1 <br/> pad_x <= 1 | No|No|No| Preferred function for 3x3 kernel size for DSP extension. </br> For MVE, use arm_depthwise_conv_s8_opt()||
			
 
				 ||arm_depthwise_conv_s8() | DEPTHWISE_CONV | None | No|No|No||
			
 
				 ||arm_depthwise_conv_s8_opt()| DEPTHWISE_CONV | dilation = 1 <br/> depth_multiplier = 1 | DSP: 2 * ker_x * ker_y * input_ch <br/> MVE: 2 * DSP + 4 | Yes| Yes| Best case is when channels are multiple of 4 or <br/>at the least >= 4 |
			
 
				 ||arm_convolve_wrapper_s16()|CONV|None|n.a.| Yes | No | The additional memory required depends on the optimal convolution function called |
			
 
				 ||arm_convolve_s16()|CONV|None|No| No | No ||
			
 
				-||arm_convolve_fast_s16()|CONV|dilation = 1, <br/> ker_x * ker_y * input_ch < 512 <br/> |4 * ker_x * ker_y * input_ch| Yes | No ||
			
 
				-||arm_depthwise_conv_wrapper_s16() | DEPTHWISE_CONV | None | n.a. | Yes | No | The additional memory required depends on the optimal convolution function called |
			
 
				-||arm_depthwise_conv_s16() | DEPTHWISE_CONV | None | No|No|No||
			
 
				-||arm_depthwise_conv_fast_s16() | DEPTHWISE_CONV | Yes | 4 * ker_x * ker_y * input_ch | Yes | No ||
			
 
				+||arm_convolve_fast_s16()|CONV|dilation = 1, <br/> ker_x * ker_y * input_ch < 512 <br/> |4 * ker_x * ker_y * input_ch| Yes | Yes ||
			
 
				+||arm_depthwise_conv_wrapper_s16() | DEPTHWISE_CONV | None | n.a. | Yes | Yes | The additional memory required depends on the optimal convolution function called |
			
 
				+||arm_depthwise_conv_s16() | DEPTHWISE_CONV | None | No | Yes ||
			
 
				+||arm_depthwise_conv_fast_s16() | DEPTHWISE_CONV | Yes | Yes. Refer to API for details | Yes | Yes ||
			
 
				 |[Fully Connected](https://arm-software.github.io/CMSIS_5/NN/html/group__FC.html)||||| |  | |
			
 
				 ||arm_fully_connected_s8() |FULLY CONNECTED & <br/> MAT MUL  | None | No | Yes | Yes | |
			
 
				 ||arm_fully_connected_s16() |FULLY CONNECTED & <br/> MAT MUL  | None | No | Yes | No | |
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_fast_s16.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_fast_s16.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2022 Arm Limited or its affiliates.
			
 
				+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -22,8 +22,8 @@
 
				  * Description:  Optimized s16 depthwise separable convolution function for
			
 
				  *               channel multiplier of 1.
			
 
				  *
			
 
				- * $Date:        May 19, 2022
			
 
				- * $Revision:    V.1.0.0
			
 
				+ * $Date:        6 July 2022
			
 
				+ * $Revision:    V.1.1.0
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  *
			
@@ -69,7 +69,7 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				         return ARM_CMSIS_NN_ARG_ERROR;
			
 
				     }
			
 
				 
			
 
				-    if (filter_dims->w * filter_dims->h * input_ch >= 512)
			
 
				+    if (filter_dims->w * filter_dims->h >= 512)
			
 
				     {
			
 
				         return ARM_CMSIS_NN_ARG_ERROR;
			
 
				     }
			
@@ -78,10 +78,12 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				     {
			
 
				         return ARM_CMSIS_NN_ARG_ERROR;
			
 
				     }
			
 
				-#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
			
 
				-    const int32_t input_batches = input_dims->n;
			
 
				+
			
 
				+#if defined(ARM_MATH_DSP)
			
 
				+    (void)bias_dims;
			
 
				     const int32_t input_x = input_dims->w;
			
 
				     const int32_t input_y = input_dims->h;
			
 
				+    const int32_t input_batches = input_dims->n;
			
 
				     const int32_t kernel_x = filter_dims->w;
			
 
				     const int32_t kernel_y = filter_dims->h;
			
 
				     const int32_t pad_x = dw_conv_params->padding.w;
			
@@ -96,7 +98,124 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				     const int32_t output_activation_max = dw_conv_params->activation.max;
			
 
				     q15_t *buffer_a = (q15_t *)ctx->buf;
			
 
				 
			
 
				-    (void)bias_dims;
			
 
				+#if defined(ARM_MATH_MVEI)
			
 
				+    int16_t *lhs_buffer = buffer_a;
			
 
				+    int16_t *out = output;
			
 
				+    int buffer_count = 0;
			
 
				+    const int32_t kernel_size = kernel_x * kernel_y;
			
 
				+
			
 
				+    for (int i_batch = 0; i_batch < input_batches; i_batch++)
			
 
				+    {
			
 
				+        /* This part implements the im2col function */
			
 
				+        for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
			
 
				+        {
			
 
				+            for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
			
 
				+            {
			
 
				+                for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++)
			
 
				+                {
			
 
				+                    for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++)
			
 
				+                    {
			
 
				+                        if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
			
 
				+                        {
			
 
				+                            memset(lhs_buffer, (int16_t)0, (uint32_t)(input_ch * sizeof(int16_t)));
			
 
				+                        }
			
 
				+                        else
			
 
				+                        {
			
 
				+                            arm_memcpy_q15(lhs_buffer,
			
 
				+                                           (int16_t *)(input + (i_ker_y * input_x + i_ker_x) * input_ch),
			
 
				+                                           (uint32_t)(input_ch * sizeof(int16_t)));
			
 
				+                        }
			
 
				+                        lhs_buffer += input_ch;
			
 
				+                    }
			
 
				+                }
			
 
				+                buffer_count++;
			
 
				+                if (buffer_count == 4)
			
 
				+                {
			
 
				+                    lhs_buffer = buffer_a;
			
 
				+
			
 
				+                    out = arm_nn_depthwise_conv_nt_t_s16(lhs_buffer,
			
 
				+                                                         kernel,
			
 
				+                                                         input_ch,
			
 
				+                                                         output_shift,
			
 
				+                                                         output_mult,
			
 
				+                                                         output_activation_min,
			
 
				+                                                         output_activation_max,
			
 
				+                                                         kernel_size,
			
 
				+                                                         bias,
			
 
				+                                                         out);
			
 
				+                    buffer_count = 0;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        input += input_x * input_y * input_ch;
			
 
				+    }
			
 
				+
			
 
				+    /* Handle left over buffers */
			
 
				+    lhs_buffer = buffer_a;
			
 
				+    for (int i_buf = 0; i_buf < buffer_count; i_buf++)
			
 
				+    {
			
 
				+        int32_t loop_count = (input_ch + 3) / 4;
			
 
				+        int32_t num_ch_to_process = input_ch;
			
 
				+
			
 
				+        for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; num_ch_to_process -= 4, offset += 4, i_loop_cnt++)
			
 
				+        {
			
 
				+            const int8_t *row_0 = kernel + offset;
			
 
				+            const int16_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset;
			
 
				+
			
 
				+            int32x4_t out_0 = vdupq_n_s32(0);
			
 
				+
			
 
				+            for (int i_ker = 0; i_ker < kernel_size; i_ker++)
			
 
				+            {
			
 
				+                const int32x4_t ker_0 = vldrbq_s32(row_0);
			
 
				+
			
 
				+                int32x4_t ip_0 = vldrhq_s32(col_0);
			
 
				+                out_0 += vmulq_s32(ip_0, ker_0);
			
 
				+
			
 
				+                col_0 += input_ch;
			
 
				+                row_0 += input_ch;
			
 
				+            }
			
 
				+
			
 
				+            int64_t in_requantize_0 = (int64_t)out_0[0];
			
 
				+            int64_t in_requantize_1 = (int64_t)out_0[1];
			
 
				+            int64_t in_requantize_2 = (int64_t)out_0[2];
			
 
				+            int64_t in_requantize_3 = (int64_t)out_0[3];
			
 
				+
			
 
				+            if (bias)
			
 
				+            {
			
 
				+                in_requantize_0 += bias[offset];
			
 
				+                in_requantize_1 += bias[offset + 1];
			
 
				+                in_requantize_2 += bias[offset + 2];
			
 
				+                in_requantize_3 += bias[offset + 3];
			
 
				+            }
			
 
				+
			
 
				+            int32_t reduced_multiplier_0 = REDUCE_MULTIPLIER(output_mult[offset]);
			
 
				+            int32_t reduced_multiplier_1 = REDUCE_MULTIPLIER(output_mult[offset + 1]);
			
 
				+            int32_t reduced_multiplier_2 = REDUCE_MULTIPLIER(output_mult[offset + 2]);
			
 
				+            int32_t reduced_multiplier_3 = REDUCE_MULTIPLIER(output_mult[offset + 3]);
			
 
				+
			
 
				+            out_0[0] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier_0, output_shift[offset]);
			
 
				+            out_0[1] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier_1, output_shift[offset + 1]);
			
 
				+            out_0[2] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier_2, output_shift[offset + 2]);
			
 
				+            out_0[3] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier_3, output_shift[offset + 3]);
			
 
				+
			
 
				+            out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min));
			
 
				+            out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max));
			
 
				+
			
 
				+            mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process);
			
 
				+            vstrhq_p_s32(out, out_0, p);
			
 
				+
			
 
				+            out += 4;
			
 
				+        }
			
 
				+
			
 
				+        const int tail_ch = input_ch & 0x3;
			
 
				+        if (tail_ch != 0)
			
 
				+        {
			
 
				+            out -= (4 - tail_ch);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+#else // ARM_MATH_DSP
			
 
				+
			
 
				     /* Run the following code in cores using DSP extension */
			
 
				     q15_t *const col_buffer_start = buffer_a;
			
 
				     q15_t *col_buffer = col_buffer_start;
			
@@ -143,9 +262,9 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				                         }
			
 
				                         else
			
 
				                         {
			
 
				-                            memcpy(&col_buffer[index],
			
 
				-                                   input + (idx_y * input_x + idx_x) * input_ch,
			
 
				-                                   input_ch * sizeof(q15_t));
			
 
				+                            arm_memcpy_q15(&col_buffer[index],
			
 
				+                                           input + (idx_y * input_x + idx_x) * input_ch,
			
 
				+                                           input_ch * sizeof(q15_t));
			
 
				                         }
			
 
				                         index += input_ch;
			
 
				                     }
			
@@ -237,10 +356,18 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				                         col_count--;
			
 
				                     }
			
 
				 
			
 
				-                    q63_t acc_1 = *bias++ + sum_1;
			
 
				-                    q63_t acc_2 = *bias++ + sum_2;
			
 
				-                    q63_t acc_3 = *bias++ + sum_3;
			
 
				-                    q63_t acc_4 = *bias++ + sum_4;
			
 
				+                    int64_t acc_1 = sum_1;
			
 
				+                    int64_t acc_2 = sum_2;
			
 
				+                    int64_t acc_3 = sum_3;
			
 
				+                    int64_t acc_4 = sum_4;
			
 
				+
			
 
				+                    if (bias)
			
 
				+                    {
			
 
				+                        acc_1 += *bias++;
			
 
				+                        acc_2 += *bias++;
			
 
				+                        acc_3 += *bias++;
			
 
				+                        acc_4 += *bias++;
			
 
				+                    }
			
 
				 
			
 
				                     result = arm_nn_requantize_s64(acc_1, output_mult_1, *output_shift++);
			
 
				                     result = MAX(result, output_activation_min);
			
@@ -278,7 +405,11 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				                     {
			
 
				                         sum += row_pos[i * input_ch] * col_pos[i * input_ch];
			
 
				                     }
			
 
				-                    q63_t acc = *bias++ + sum;
			
 
				+                    int64_t acc = sum;
			
 
				+                    if (bias)
			
 
				+                    {
			
 
				+                        acc += *bias++;
			
 
				+                    }
			
 
				                     result = arm_nn_requantize_s64(acc, REDUCE_MULTIPLIER(*output_mult), *output_shift++);
			
 
				                     output_mult++;
			
 
				                     result = MAX(result, output_activation_min);
			
@@ -287,7 +418,6 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				 
			
 
				                     row_count--;
			
 
				                 }
			
 
				-
			
 
				                 // clear counter and pointers
			
 
				                 col_buffer = col_buffer_start;
			
 
				             }
			
@@ -296,6 +426,7 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				         /* Advance to the next batch */
			
 
				         input += (input_x * input_y * input_ch);
			
 
				     }
			
 
				+#endif
			
 
				 #else
			
 
				     /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
			
 
				     return arm_depthwise_conv_s16(ctx,
			
@@ -317,8 +448,13 @@ arm_cmsis_nn_status arm_depthwise_conv_fast_s16(const cmsis_nn_context *ctx,
 
				 
			
 
				 int32_t arm_depthwise_conv_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
			
 
				 {
			
 
				-#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
			
 
				-    return ((input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t));
			
 
				+#if defined(ARM_MATH_DSP)
			
 
				+#if defined(ARM_MATH_MVEI)
			
 
				+    /* The + 8 accounts for a worst case out of bounds read of the lhs buffers in the *_nt_t_* function.  */
			
 
				+    return 4 * input_dims->c * filter_dims->w * filter_dims->h * sizeof(int16_t) + 8;
			
 
				+#else // ARM_MATH_DSP
			
 
				+    return input_dims->c * filter_dims->w * filter_dims->h * sizeof(int16_t);
			
 
				+#endif
			
 
				 #else
			
 
				     (void)input_dims;
			
 
				     (void)filter_dims;
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -21,8 +21,8 @@
 
				  * Title:        arm_depthwise_conv_s8.c
			
 
				  * Description:  s8 version of depthwise convolution.
			
 
				  *
			
 
				- * $Date:        9. May 2022
			
 
				- * $Revision:    V.3.0.1
			
 
				+ * $Date:        6 July 2022
			
 
				+ * $Revision:    V.3.0.2
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  *
			
@@ -281,7 +281,6 @@ arm_cmsis_nn_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
 
				     const uint16_t dilation_x = dw_conv_params->dilation.w;
			
 
				     const uint16_t dilation_y = dw_conv_params->dilation.h;
			
 
				 
			
 
				-    (void)dw_conv_params->dilation;
			
 
				     (void)bias_dims;
			
 
				     (void)ctx;
			
 
				 
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -22,8 +22,8 @@
 
				  * Description:  Optimized s8 depthwise separable convolution function for
			
 
				  *               channel multiplier of 1.
			
 
				  *
			
 
				- * $Date:        19 April 2022
			
 
				- * $Revision:    V.3.0.0
			
 
				+ * $Date:        6 July 2022
			
 
				+ * $Revision:    V.3.0.1
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  *
			
@@ -75,6 +75,7 @@ arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
 
				         return ARM_CMSIS_NN_ARG_ERROR;
			
 
				     }
			
 
				 #ifdef ARM_MATH_DSP
			
 
				+    (void)bias_dims;
			
 
				     const int32_t input_x = input_dims->w;
			
 
				     const int32_t input_y = input_dims->h;
			
 
				     const int32_t kernel_x = filter_dims->w;
			
@@ -94,7 +95,6 @@ arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
 
				     q15_t *buffer_a = (q15_t *)ctx->buf;
			
 
				 
			
 
				 #ifdef ARM_MATH_MVEI
			
 
				-    (void)bias_dims;
			
 
				     /* Generate two columns from the input tensor */
			
 
				     q7_t *lhs_buffer = (q7_t *)buffer_a;
			
 
				     q7_t *out = output;
			
@@ -170,8 +170,8 @@ arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
 
				     for (int i_buf = 0; i_buf < buffer_count; i_buf++)
			
 
				     {
			
 
				         int32_t loop_count = (input_ch + 3) / 4;
			
 
				-
			
 
				         int32_t num_ch_to_process = input_ch;
			
 
				+
			
 
				         for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; num_ch_to_process -= 4, offset += 4, i_loop_cnt++)
			
 
				         {
			
 
				             const int8_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset;
			
@@ -211,7 +211,6 @@ arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
 
				     }
			
 
				 
			
 
				 #else // ARM_MATH_DSP
			
 
				-    (void)bias_dims;
			
 
				     /* Run the following code in cores using DSP extension */
			
 
				     q15_t *const col_buffer_start = buffer_a;
			
 
				     q15_t *col_buffer = col_buffer_start;
			
@@ -418,7 +417,7 @@ int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dim
 
				 {
			
 
				 #if defined(ARM_MATH_MVEI)
			
 
				     /* The + 4 accounts for out of bounds read of the lhs buffers in the *_nt_t_* functions.  */
			
 
				-    return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t) + 4;
			
 
				+    return (4 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int8_t) + 4;
			
 
				 #elif defined(ARM_MATH_DSP)
			
 
				     return (input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t);
			
 
				 #else
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s16.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s16.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Copyright (C) 2010-2022 Arm Limited or its affiliates.
			
 
				+ * SPDX-FileCopyrightText: Copyright 2010-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				  *
			
 
				  * SPDX-License-Identifier: Apache-2.0
			
 
				  *
			
@@ -22,8 +22,8 @@
 
				  * Description:  Wrapper API to select appropriate depthwise conv API based
			
 
				  *               on dimensions.
			
 
				  *
			
 
				- * $Date:        19. May 2022
			
 
				- * $Revision:    V.1.0.0
			
 
				+ * $Date:        6 July 2022
			
 
				+ * $Revision:    V.1.0.1
			
 
				  *
			
 
				  * Target Processor:  Cortex-M CPUs
			
 
				  *
			
@@ -40,6 +40,10 @@
 
				  * @{
			
 
				  */
			
 
				 
			
 
				+#define USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims)                                             \
			
 
				+    (dw_conv_params->ch_mult == 1 && dw_conv_params->dilation.w == 1 && dw_conv_params->dilation.h == 1 &&             \
			
 
				+     filter_dims->w * filter_dims->h * input_dims->c < 512)
			
 
				+
			
 
				 /*
			
 
				  *  s16 Depthwise conv wrapper function
			
 
				  *
			
@@ -60,8 +64,7 @@ arm_cmsis_nn_status arm_depthwise_conv_wrapper_s16(const cmsis_nn_context *ctx,
 
				 {
			
 
				     arm_cmsis_nn_status status = ARM_CMSIS_NN_SUCCESS;
			
 
				 
			
 
				-    if (dw_conv_params->ch_mult == 1 && dw_conv_params->dilation.w == 1 && dw_conv_params->dilation.h == 1 &&
			
 
				-        filter_dims->w * filter_dims->h * input_dims->c < 512)
			
 
				+    if (USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims))
			
 
				     {
			
 
				         status = arm_depthwise_conv_fast_s16(ctx,
			
 
				                                              dw_conv_params,
			
@@ -105,8 +108,7 @@ int32_t arm_depthwise_conv_wrapper_s16_get_buffer_size(const cmsis_nn_dw_conv_pa
 
				     (void)output_dims;
			
 
				     int32_t size = 0;
			
 
				 
			
 
				-    if (dw_conv_params->ch_mult == 1 && dw_conv_params->dilation.w == 1 && dw_conv_params->dilation.h == 1 &&
			
 
				-        filter_dims->w * filter_dims->h * input_dims->c < 512)
			
 
				+    if (USE_FAST_DW_CONV_FUNCTION(dw_conv_params, filter_dims, input_dims))
			
 
				     {
			
 
				         size = arm_depthwise_conv_fast_s16_get_buffer_size(input_dims, filter_dims);
			
 
				     }
			
--- a/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt
+++ b/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt
@@ -1,5 +1,5 @@
 
				 #
			
 
				-# Copyright (c) 2019-2022 Arm Limited.
			
 
				+# SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				 #
			
 
				 # SPDX-License-Identifier: Apache-2.0
			
 
				 #
			
@@ -22,5 +22,6 @@ target_sources(cmsis-nn PRIVATE ${SRC} arm_q7_to_q15_with_offset.c
 
				                                        arm_q7_to_q15_with_offset.c
			
 
				                                        arm_nn_mat_mul_kernel_s16.c
			
 
				                                        arm_nn_vec_mat_mult_t_s16.c
			
 
				-                                       arm_q7_to_q15_no_shift.c)
			
 
				+                                       arm_q7_to_q15_no_shift.c
			
 
				+                                       arm_nn_depthwise_conv_nt_t_s16.c)
			
 
				 
			
--- a/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c
+++ b/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c
@@ -0,0 +1,171 @@
 
				+/*
			
 
				+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the License); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/* ----------------------------------------------------------------------
			
 
				+ * Project:      CMSIS NN Library
			
 
				+ * Title:        arm_nn_depthwise_conv_nt_t_s16.c
			
 
				+ * Description:  Depthwise convolution on matrices with no padding.
			
 
				+ *
			
 
				+ * $Date:        6 July 2022
			
 
				+ * $Revision:    V.1.0.0
			
 
				+ *
			
 
				+ * Target Processor:  Cortex-M processors with MVE extension
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+#include "arm_nnsupportfunctions.h"
			
 
				+
			
 
				+/**
			
 
				+ * @ingroup groupSupport
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * @addtogroup NNBasicMath
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * Depthwise convolution of rhs matrix with 4 lhs matrices with no padding. Dimensions are the same for lhs and rhs.
			
 
				+ *
			
 
				+ * Refer header file for details.
			
 
				+ *
			
 
				+ */
			
 
				+int16_t *arm_nn_depthwise_conv_nt_t_s16(const int16_t *lhs,
			
 
				+                                        const q7_t *rhs,
			
 
				+                                        const uint16_t num_ch,
			
 
				+                                        const int32_t *out_shift,
			
 
				+                                        const int32_t *out_mult,
			
 
				+                                        const int32_t activation_min,
			
 
				+                                        const int32_t activation_max,
			
 
				+                                        const uint16_t row_x_col,
			
 
				+                                        const int64_t *const output_bias,
			
 
				+                                        int16_t *out)
			
 
				+{
			
 
				+#if defined(ARM_MATH_MVEI)
			
 
				+
			
 
				+    const int64_t *bias = output_bias;
			
 
				+    int32_t loop_count = (num_ch + 3) / 4;
			
 
				+    uint32_t num_ch_to_process = num_ch;
			
 
				+
			
 
				+    for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count;
			
 
				+         num_ch_to_process -= 4, offset += 4, out += 4, i_loop_cnt++)
			
 
				+    {
			
 
				+        const int8_t *rhs_0 = rhs + offset;
			
 
				+        const int16_t *lhs_0 = lhs + offset;
			
 
				+        const int16_t *lhs_1 = lhs + row_x_col * num_ch + offset;
			
 
				+        const int16_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset;
			
 
				+        const int16_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset;
			
 
				+
			
 
				+        int32x4_t out_0 = vdupq_n_s32(0);
			
 
				+        int32x4_t out_1 = vdupq_n_s32(0);
			
 
				+        int32x4_t out_2 = vdupq_n_s32(0);
			
 
				+        int32x4_t out_3 = vdupq_n_s32(0);
			
 
				+
			
 
				+        for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++)
			
 
				+        {
			
 
				+            const int32x4_t ker_0 = vldrbq_s32(rhs_0);
			
 
				+
			
 
				+            int32x4_t ip_0 = vldrhq_s32(lhs_0);
			
 
				+            out_0 += vmulq_s32(ip_0, ker_0);
			
 
				+
			
 
				+            int32x4_t ip_1 = vldrhq_s32(lhs_1);
			
 
				+            out_1 += vmulq_s32(ip_1, ker_0);
			
 
				+
			
 
				+            int32x4_t ip_2 = vldrhq_s32(lhs_2);
			
 
				+            out_2 += vmulq_s32(ip_2, ker_0);
			
 
				+
			
 
				+            int32x4_t ip_3 = vldrhq_s32(lhs_3);
			
 
				+            out_3 += vmulq_s32(ip_3, ker_0);
			
 
				+
			
 
				+            lhs_0 += num_ch;
			
 
				+            lhs_1 += num_ch;
			
 
				+            lhs_2 += num_ch;
			
 
				+            lhs_3 += num_ch;
			
 
				+
			
 
				+            rhs_0 += num_ch;
			
 
				+        }
			
 
				+
			
 
				+        for (int i_requantize = 0; i_requantize < 4; i_requantize++)
			
 
				+        {
			
 
				+            int32_t reduced_multiplier = REDUCE_MULTIPLIER(out_mult[i_requantize]);
			
 
				+            int32_t shift = out_shift[i_requantize];
			
 
				+            int64_t in_requantize_0 = (int64_t)out_0[i_requantize];
			
 
				+            int64_t in_requantize_1 = (int64_t)out_1[i_requantize];
			
 
				+            int64_t in_requantize_2 = (int64_t)out_2[i_requantize];
			
 
				+            int64_t in_requantize_3 = (int64_t)out_3[i_requantize];
			
 
				+
			
 
				+            if (bias)
			
 
				+            {
			
 
				+                in_requantize_0 += *bias;
			
 
				+                in_requantize_1 += *bias;
			
 
				+                in_requantize_2 += *bias;
			
 
				+                in_requantize_3 += *bias;
			
 
				+                bias++;
			
 
				+            }
			
 
				+
			
 
				+            out_0[i_requantize] = arm_nn_requantize_s64(in_requantize_0, reduced_multiplier, shift);
			
 
				+            out_1[i_requantize] = arm_nn_requantize_s64(in_requantize_1, reduced_multiplier, shift);
			
 
				+            out_2[i_requantize] = arm_nn_requantize_s64(in_requantize_2, reduced_multiplier, shift);
			
 
				+            out_3[i_requantize] = arm_nn_requantize_s64(in_requantize_3, reduced_multiplier, shift);
			
 
				+        }
			
 
				+
			
 
				+        mve_pred16_t p = vctp32q(num_ch_to_process);
			
 
				+
			
 
				+        out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min));
			
 
				+        out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max));
			
 
				+        vstrhq_p_s32(out, out_0, p);
			
 
				+
			
 
				+        out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min));
			
 
				+        out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max));
			
 
				+        vstrhq_p_s32(out + num_ch, out_1, p);
			
 
				+
			
 
				+        out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min));
			
 
				+        out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max));
			
 
				+        vstrhq_p_s32(out + 2 * num_ch, out_2, p);
			
 
				+
			
 
				+        out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min));
			
 
				+        out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max));
			
 
				+        vstrhq_p_s32(out + 3 * num_ch, out_3, p);
			
 
				+
			
 
				+        out_mult += 4;
			
 
				+        out_shift += 4;
			
 
				+    }
			
 
				+    const int tail_ch = num_ch & 0x3;
			
 
				+    if (tail_ch != 0)
			
 
				+    {
			
 
				+        out -= (4 - tail_ch);
			
 
				+    }
			
 
				+
			
 
				+    return out + (3 * num_ch);
			
 
				+#else
			
 
				+    (void)lhs;
			
 
				+    (void)rhs;
			
 
				+    (void)num_ch;
			
 
				+    (void)out_shift;
			
 
				+    (void)out_mult;
			
 
				+    (void)activation_min;
			
 
				+    (void)activation_max;
			
 
				+    (void)row_x_col;
			
 
				+    (void)output_bias;
			
 
				+    (void)out;
			
 
				+    return NULL;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @} end of NNBasicMath group
			
 
				+ */
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers/bias.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers/bias.txt
@@ -0,0 +1,2 @@
 
				+# 8
			
 
				+-2.210000000000000000e+04,2.788100000000000000e+04,5.096000000000000000e+03,1.022600000000000000e+04,-5.822000000000000000e+03,2.018300000000000000e+04,1.650100000000000000e+04,3.081800000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers/input.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers/input.txt
@@ -0,0 +1,76 @@
 
				+# 3,5,5,8
			
 
				+4.514000000000000000e+03,-6.264000000000000000e+03,2.243700000000000000e+04,-3.008100000000000000e+04,-3.273100000000000000e+04,2.333300000000000000e+04,-6.303000000000000000e+03,-2.871800000000000000e+04
			
 
				+-2.343600000000000000e+04,-1.999900000000000000e+04,-1.245500000000000000e+04,-3.187500000000000000e+04,-9.662000000000000000e+03,2.847600000000000000e+04,2.018200000000000000e+04,1.410500000000000000e+04
			
 
				+-1.251500000000000000e+04,2.549100000000000000e+04,1.778100000000000000e+04,-1.518500000000000000e+04,-1.918700000000000000e+04,-1.176000000000000000e+04,-1.262300000000000000e+04,-9.167000000000000000e+03
			
 
				+4.279000000000000000e+03,3.262800000000000000e+04,-2.558000000000000000e+04,6.303000000000000000e+03,-2.620900000000000000e+04,-2.894600000000000000e+04,2.036000000000000000e+03,8.196000000000000000e+03
			
 
				+1.744400000000000000e+04,4.500000000000000000e+03,3.847000000000000000e+03,-1.603900000000000000e+04,2.864200000000000000e+04,-8.731000000000000000e+03,2.214700000000000000e+04,-7.940000000000000000e+02
			
 
				+1.888300000000000000e+04,1.088900000000000000e+04,1.200000000000000000e+01,-6.815000000000000000e+03,-8.290000000000000000e+02,-1.721700000000000000e+04,8.164000000000000000e+03,1.928300000000000000e+04
			
 
				+-2.161300000000000000e+04,1.753800000000000000e+04,2.129300000000000000e+04,-5.966000000000000000e+03,-2.775300000000000000e+04,9.093000000000000000e+03,1.348700000000000000e+04,-4.789000000000000000e+03
			
 
				+6.998000000000000000e+03,2.703700000000000000e+04,1.715200000000000000e+04,-2.432600000000000000e+04,1.038900000000000000e+04,-2.371600000000000000e+04,2.167200000000000000e+04,-1.288400000000000000e+04
			
 
				+-2.688400000000000000e+04,1.299200000000000000e+04,-1.624500000000000000e+04,1.101400000000000000e+04,2.925000000000000000e+04,2.592200000000000000e+04,3.946000000000000000e+03,-2.434300000000000000e+04
			
 
				+1.905200000000000000e+04,1.215300000000000000e+04,-2.502200000000000000e+04,4.532000000000000000e+03,1.952300000000000000e+04,1.816600000000000000e+04,-5.593000000000000000e+03,-6.730000000000000000e+03
			
 
				+-2.578800000000000000e+04,1.277100000000000000e+04,7.080000000000000000e+02,-2.009900000000000000e+04,2.264500000000000000e+04,1.158800000000000000e+04,-1.649000000000000000e+04,-7.065000000000000000e+03
			
 
				+-1.201500000000000000e+04,1.563400000000000000e+04,3.052600000000000000e+04,2.827700000000000000e+04,2.475200000000000000e+04,2.716800000000000000e+04,1.192100000000000000e+04,1.429800000000000000e+04
			
 
				+3.233800000000000000e+04,3.118500000000000000e+04,-2.666700000000000000e+04,-2.105600000000000000e+04,-3.028200000000000000e+04,-6.235000000000000000e+03,-1.259000000000000000e+04,2.642000000000000000e+03
			
 
				+-1.683300000000000000e+04,4.775000000000000000e+03,-1.608000000000000000e+04,1.870800000000000000e+04,3.449000000000000000e+03,1.212600000000000000e+04,2.694100000000000000e+04,2.913100000000000000e+04
			
 
				+2.495000000000000000e+04,-1.299100000000000000e+04,1.392700000000000000e+04,-3.011200000000000000e+04,-2.297600000000000000e+04,1.987200000000000000e+04,1.563300000000000000e+04,1.548200000000000000e+04
			
 
				+8.058000000000000000e+03,-1.158200000000000000e+04,1.227000000000000000e+03,2.826200000000000000e+04,2.193500000000000000e+04,9.733000000000000000e+03,-2.871300000000000000e+04,4.935000000000000000e+03
			
 
				+-2.576200000000000000e+04,-2.397300000000000000e+04,1.320800000000000000e+04,-7.388000000000000000e+03,1.359800000000000000e+04,1.701300000000000000e+04,1.955000000000000000e+04,2.713100000000000000e+04
			
 
				+-1.161900000000000000e+04,8.183000000000000000e+03,3.195900000000000000e+04,-1.848100000000000000e+04,-1.434300000000000000e+04,3.028800000000000000e+04,-2.797400000000000000e+04,-1.886000000000000000e+03
			
 
				+1.253300000000000000e+04,1.497500000000000000e+04,6.799000000000000000e+03,3.206300000000000000e+04,2.312000000000000000e+04,1.855300000000000000e+04,6.933000000000000000e+03,-1.200000000000000000e+04
			
 
				+1.642900000000000000e+04,-2.818100000000000000e+04,1.113100000000000000e+04,7.045000000000000000e+03,8.840000000000000000e+03,-1.051100000000000000e+04,-2.140400000000000000e+04,3.026200000000000000e+04
			
 
				+1.024100000000000000e+04,-2.046500000000000000e+04,-1.597600000000000000e+04,-4.000000000000000000e+00,-2.297300000000000000e+04,-9.300000000000000000e+02,3.203800000000000000e+04,1.851000000000000000e+03
			
 
				+-3.240600000000000000e+04,6.362000000000000000e+03,-1.218500000000000000e+04,-2.622700000000000000e+04,3.135300000000000000e+04,-1.769800000000000000e+04,2.027300000000000000e+04,1.817200000000000000e+04
			
 
				+-1.461800000000000000e+04,-9.049000000000000000e+03,2.663800000000000000e+04,3.172200000000000000e+04,4.500000000000000000e+01,-8.207000000000000000e+03,-7.542000000000000000e+03,3.025600000000000000e+04
			
 
				+3.977000000000000000e+03,-3.803000000000000000e+03,-6.135000000000000000e+03,-6.511000000000000000e+03,-7.829000000000000000e+03,4.396000000000000000e+03,-2.836100000000000000e+04,2.324900000000000000e+04
			
 
				+2.127000000000000000e+03,-1.956900000000000000e+04,1.510700000000000000e+04,-2.167900000000000000e+04,3.223400000000000000e+04,8.862000000000000000e+03,1.517200000000000000e+04,2.059700000000000000e+04
			
 
				+2.222000000000000000e+03,3.178100000000000000e+04,-1.672400000000000000e+04,-1.302300000000000000e+04,8.696000000000000000e+03,5.212000000000000000e+03,-3.036500000000000000e+04,7.750000000000000000e+03
			
 
				+7.714000000000000000e+03,-4.491000000000000000e+03,1.646400000000000000e+04,2.101400000000000000e+04,-2.145800000000000000e+04,-3.191000000000000000e+04,7.383000000000000000e+03,-3.010600000000000000e+04
			
 
				+2.700700000000000000e+04,3.540000000000000000e+02,-2.871900000000000000e+04,-2.181300000000000000e+04,2.960000000000000000e+03,2.215900000000000000e+04,1.061900000000000000e+04,3.099000000000000000e+04
			
 
				+-2.177200000000000000e+04,-1.718300000000000000e+04,-2.527200000000000000e+04,3.563000000000000000e+03,4.941000000000000000e+03,1.212300000000000000e+04,2.685000000000000000e+03,-2.343200000000000000e+04
			
 
				+-2.764700000000000000e+04,2.378000000000000000e+04,1.741900000000000000e+04,-2.472300000000000000e+04,-2.712100000000000000e+04,-4.803000000000000000e+03,-4.830000000000000000e+02,1.208000000000000000e+03
			
 
				+-1.347500000000000000e+04,-1.725100000000000000e+04,1.942000000000000000e+04,-7.826000000000000000e+03,1.706600000000000000e+04,7.540000000000000000e+03,2.244800000000000000e+04,2.667000000000000000e+03
			
 
				+-1.480700000000000000e+04,8.150000000000000000e+03,-2.688400000000000000e+04,-7.742000000000000000e+03,1.174700000000000000e+04,-3.039800000000000000e+04,-1.337000000000000000e+04,1.865300000000000000e+04
			
 
				+2.316700000000000000e+04,3.057800000000000000e+04,1.084400000000000000e+04,2.910100000000000000e+04,9.598000000000000000e+03,3.146200000000000000e+04,-5.056000000000000000e+03,2.403800000000000000e+04
			
 
				+-8.900000000000000000e+03,-3.215800000000000000e+04,-2.467900000000000000e+04,-2.366400000000000000e+04,7.600000000000000000e+03,-1.148400000000000000e+04,1.045400000000000000e+04,2.372400000000000000e+04
			
 
				+1.777500000000000000e+04,2.612000000000000000e+03,-1.303100000000000000e+04,-5.835000000000000000e+03,2.091000000000000000e+04,-3.078900000000000000e+04,-3.334000000000000000e+03,5.527000000000000000e+03
			
 
				+-1.682600000000000000e+04,-2.339400000000000000e+04,-2.831000000000000000e+03,-3.130400000000000000e+04,1.784100000000000000e+04,8.784000000000000000e+03,4.300000000000000000e+03,-2.415500000000000000e+04
			
 
				+2.094000000000000000e+03,3.747000000000000000e+03,2.542000000000000000e+04,1.697700000000000000e+04,2.077900000000000000e+04,1.567000000000000000e+04,2.185500000000000000e+04,-2.615900000000000000e+04
			
 
				+-2.394100000000000000e+04,-2.778400000000000000e+04,-2.540000000000000000e+02,-5.185000000000000000e+03,-9.459000000000000000e+03,-2.573000000000000000e+04,1.634400000000000000e+04,1.041800000000000000e+04
			
 
				+-2.287500000000000000e+04,1.527300000000000000e+04,-3.263000000000000000e+04,-1.748300000000000000e+04,-2.253800000000000000e+04,-2.142200000000000000e+04,-8.329000000000000000e+03,1.048800000000000000e+04
			
 
				+1.116500000000000000e+04,-2.564000000000000000e+03,5.247000000000000000e+03,2.881400000000000000e+04,3.213000000000000000e+03,-2.369200000000000000e+04,-3.040700000000000000e+04,1.849000000000000000e+03
			
 
				+-1.858800000000000000e+04,-2.219800000000000000e+04,-7.200000000000000000e+03,-6.210000000000000000e+03,-1.432900000000000000e+04,1.373700000000000000e+04,-3.413000000000000000e+03,-2.495500000000000000e+04
			
 
				+1.435200000000000000e+04,-1.234900000000000000e+04,-3.199000000000000000e+03,2.314600000000000000e+04,-2.587000000000000000e+04,2.794700000000000000e+04,1.449800000000000000e+04,1.671000000000000000e+03
			
 
				+-1.506300000000000000e+04,1.331000000000000000e+03,-7.660000000000000000e+03,1.660000000000000000e+03,-1.490700000000000000e+04,-1.759400000000000000e+04,2.878100000000000000e+04,1.917000000000000000e+04
			
 
				+-2.697200000000000000e+04,1.394100000000000000e+04,2.788800000000000000e+04,-2.080700000000000000e+04,8.100000000000000000e+03,2.066300000000000000e+04,4.881000000000000000e+03,-4.664000000000000000e+03
			
 
				+2.464800000000000000e+04,-1.534800000000000000e+04,2.698800000000000000e+04,3.764000000000000000e+03,7.064000000000000000e+03,-1.969800000000000000e+04,2.866200000000000000e+04,4.967000000000000000e+03
			
 
				+-3.144700000000000000e+04,-3.990000000000000000e+03,1.319000000000000000e+03,1.021600000000000000e+04,-2.404000000000000000e+03,5.452000000000000000e+03,-6.171000000000000000e+03,1.610500000000000000e+04
			
 
				+2.878400000000000000e+04,-2.354000000000000000e+04,-6.038000000000000000e+03,-2.026700000000000000e+04,6.141000000000000000e+03,2.268800000000000000e+04,-1.863700000000000000e+04,1.649700000000000000e+04
			
 
				+-2.558900000000000000e+04,-2.687700000000000000e+04,8.084000000000000000e+03,2.963000000000000000e+03,1.295600000000000000e+04,-3.018000000000000000e+03,-1.346300000000000000e+04,-2.595900000000000000e+04
			
 
				+7.863000000000000000e+03,-3.155300000000000000e+04,7.655000000000000000e+03,-2.254400000000000000e+04,-2.389100000000000000e+04,-2.207600000000000000e+04,-5.237000000000000000e+03,3.099400000000000000e+04
			
 
				+2.414200000000000000e+04,2.141400000000000000e+04,-2.080900000000000000e+04,-1.792300000000000000e+04,-9.170000000000000000e+02,-2.898700000000000000e+04,-6.960000000000000000e+03,-1.285000000000000000e+03
			
 
				+1.408800000000000000e+04,1.873100000000000000e+04,6.776000000000000000e+03,-5.997000000000000000e+03,-3.200000000000000000e+02,1.353200000000000000e+04,7.767000000000000000e+03,3.178100000000000000e+04
			
 
				+-8.852000000000000000e+03,5.386000000000000000e+03,2.109900000000000000e+04,-2.248000000000000000e+03,-2.158000000000000000e+04,-2.943000000000000000e+03,1.878200000000000000e+04,-2.715800000000000000e+04
			
 
				+-1.761000000000000000e+03,-3.015600000000000000e+04,2.276900000000000000e+04,-2.124900000000000000e+04,2.452400000000000000e+04,3.117500000000000000e+04,-2.082200000000000000e+04,-9.223000000000000000e+03
			
 
				+6.151000000000000000e+03,-1.555200000000000000e+04,2.475600000000000000e+04,2.379500000000000000e+04,4.022000000000000000e+03,1.985300000000000000e+04,-9.712000000000000000e+03,3.043000000000000000e+04
			
 
				+-1.239100000000000000e+04,-4.296000000000000000e+03,-8.929000000000000000e+03,2.153700000000000000e+04,1.479600000000000000e+04,2.714200000000000000e+04,-1.042400000000000000e+04,-1.658100000000000000e+04
			
 
				+-3.041000000000000000e+03,-1.354400000000000000e+04,-2.699200000000000000e+04,-1.889000000000000000e+04,-2.246200000000000000e+04,-6.774000000000000000e+03,1.162800000000000000e+04,1.018000000000000000e+03
			
 
				+-2.562100000000000000e+04,1.835300000000000000e+04,-1.790800000000000000e+04,-1.992200000000000000e+04,2.628500000000000000e+04,1.079000000000000000e+04,2.465500000000000000e+04,2.280200000000000000e+04
			
 
				+9.859000000000000000e+03,1.566000000000000000e+04,2.064300000000000000e+04,2.626800000000000000e+04,-2.352400000000000000e+04,-2.389500000000000000e+04,-1.138800000000000000e+04,-1.169800000000000000e+04
			
 
				+2.240800000000000000e+04,-2.975400000000000000e+04,-1.048800000000000000e+04,-3.039800000000000000e+04,1.322500000000000000e+04,1.075500000000000000e+04,-2.631600000000000000e+04,2.310200000000000000e+04
			
 
				+2.731100000000000000e+04,3.065900000000000000e+04,-3.204500000000000000e+04,-2.260000000000000000e+03,-1.119500000000000000e+04,2.319800000000000000e+04,2.089400000000000000e+04,9.002000000000000000e+03
			
 
				+2.227000000000000000e+04,1.253800000000000000e+04,2.569000000000000000e+04,1.269700000000000000e+04,-1.738700000000000000e+04,3.174000000000000000e+04,-7.549000000000000000e+03,-2.576000000000000000e+03
			
 
				+-1.084000000000000000e+04,1.429000000000000000e+04,1.577300000000000000e+04,1.222000000000000000e+04,-9.868000000000000000e+03,-3.053100000000000000e+04,1.334000000000000000e+03,8.932000000000000000e+03
			
 
				+1.043100000000000000e+04,2.807400000000000000e+04,1.510400000000000000e+04,-4.137000000000000000e+03,1.830100000000000000e+04,3.255200000000000000e+04,-4.026000000000000000e+03,-1.938800000000000000e+04
			
 
				+-1.137000000000000000e+04,3.080100000000000000e+04,-2.192800000000000000e+04,-3.470000000000000000e+03,2.481200000000000000e+04,9.177000000000000000e+03,-2.041200000000000000e+04,-3.033200000000000000e+04
			
 
				+-2.986000000000000000e+03,-3.125800000000000000e+04,-1.117000000000000000e+04,2.069100000000000000e+04,1.648200000000000000e+04,3.247800000000000000e+04,-6.432000000000000000e+03,2.083300000000000000e+04
			
 
				+5.603000000000000000e+03,-7.879000000000000000e+03,1.672100000000000000e+04,3.131100000000000000e+04,4.882000000000000000e+03,-1.684300000000000000e+04,2.834900000000000000e+04,2.640300000000000000e+04
			
 
				+1.821000000000000000e+03,1.774500000000000000e+04,1.594100000000000000e+04,-3.224100000000000000e+04,-3.146400000000000000e+04,3.107200000000000000e+04,-8.820000000000000000e+02,-7.897000000000000000e+03
			
 
				+1.638500000000000000e+04,-6.740000000000000000e+02,-1.685800000000000000e+04,-1.399200000000000000e+04,-1.724000000000000000e+04,-3.076500000000000000e+04,2.556800000000000000e+04,2.208500000000000000e+04
			
 
				+-1.887700000000000000e+04,-1.502500000000000000e+04,-2.517500000000000000e+04,-4.546000000000000000e+03,1.094100000000000000e+04,-1.128500000000000000e+04,-2.770700000000000000e+04,1.275400000000000000e+04
			
 
				+-1.500400000000000000e+04,-6.357000000000000000e+03,1.033600000000000000e+04,-2.184600000000000000e+04,2.503900000000000000e+04,-1.229400000000000000e+04,2.955300000000000000e+04,1.889200000000000000e+04
			
 
				+3.053000000000000000e+03,1.469800000000000000e+04,3.240400000000000000e+04,-2.582500000000000000e+04,-2.887000000000000000e+03,1.956400000000000000e+04,-2.360100000000000000e+04,1.285500000000000000e+04
			
 
				+-1.167400000000000000e+04,-4.663000000000000000e+03,1.953200000000000000e+04,-1.115700000000000000e+04,1.580000000000000000e+04,1.722500000000000000e+04,2.870100000000000000e+04,1.020000000000000000e+04
			
 
				+2.647200000000000000e+04,-1.569100000000000000e+04,1.359100000000000000e+04,-2.531000000000000000e+03,1.400600000000000000e+04,-1.831200000000000000e+04,2.749700000000000000e+04,1.140200000000000000e+04
			
 
				+2.788900000000000000e+04,-2.334800000000000000e+04,-1.268800000000000000e+04,-2.863900000000000000e+04,2.965300000000000000e+04,4.195000000000000000e+03,2.732700000000000000e+04,-1.248600000000000000e+04
			
 
				+2.089000000000000000e+04,5.154000000000000000e+03,2.382400000000000000e+04,1.703200000000000000e+04,2.615400000000000000e+04,1.686300000000000000e+04,3.159600000000000000e+04,1.861700000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers/kernel.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers/kernel.txt
@@ -0,0 +1,73 @@
 
				+# 3,3,8,1
			
 
				+-3.212100000000000000e+04
			
 
				+8.822000000000000000e+03
			
 
				+-5.729000000000000000e+03
			
 
				+-7.454000000000000000e+03
			
 
				+-1.075400000000000000e+04
			
 
				+-1.434300000000000000e+04
			
 
				+2.789100000000000000e+04
			
 
				+7.376000000000000000e+03
			
 
				+-2.298200000000000000e+04
			
 
				+-2.320400000000000000e+04
			
 
				+3.272800000000000000e+04
			
 
				+-1.930600000000000000e+04
			
 
				+7.724000000000000000e+03
			
 
				+5.890000000000000000e+02
			
 
				+-7.187000000000000000e+03
			
 
				+1.671000000000000000e+04
			
 
				+2.230700000000000000e+04
			
 
				+2.753400000000000000e+04
			
 
				+-2.766500000000000000e+04
			
 
				+1.121400000000000000e+04
			
 
				+-5.097000000000000000e+03
			
 
				+9.728000000000000000e+03
			
 
				+-2.516600000000000000e+04
			
 
				+-1.773200000000000000e+04
			
 
				+-1.469600000000000000e+04
			
 
				+3.230000000000000000e+02
			
 
				+1.567900000000000000e+04
			
 
				+-1.057300000000000000e+04
			
 
				+-6.110000000000000000e+02
			
 
				+-2.566300000000000000e+04
			
 
				+-1.308300000000000000e+04
			
 
				+-1.943000000000000000e+04
			
 
				+-2.294300000000000000e+04
			
 
				+1.573000000000000000e+03
			
 
				+-2.091600000000000000e+04
			
 
				+3.089500000000000000e+04
			
 
				+-4.434000000000000000e+03
			
 
				+-6.590000000000000000e+03
			
 
				+-4.870000000000000000e+02
			
 
				+-3.074900000000000000e+04
			
 
				+1.456000000000000000e+03
			
 
				+-9.657000000000000000e+03
			
 
				+-6.790000000000000000e+02
			
 
				+-2.608300000000000000e+04
			
 
				+-7.920000000000000000e+02
			
 
				+-7.298000000000000000e+03
			
 
				+-2.479700000000000000e+04
			
 
				+-3.030300000000000000e+04
			
 
				+-3.053400000000000000e+04
			
 
				+-2.850100000000000000e+04
			
 
				+-3.049200000000000000e+04
			
 
				+-4.336000000000000000e+03
			
 
				+-2.556000000000000000e+04
			
 
				+-1.524100000000000000e+04
			
 
				+1.065200000000000000e+04
			
 
				+1.129800000000000000e+04
			
 
				+-3.019300000000000000e+04
			
 
				+-2.056700000000000000e+04
			
 
				+7.330000000000000000e+03
			
 
				+1.836800000000000000e+04
			
 
				+-1.734600000000000000e+04
			
 
				+5.206000000000000000e+03
			
 
				+1.378300000000000000e+04
			
 
				+3.211300000000000000e+04
			
 
				+-2.714100000000000000e+04
			
 
				+-3.112600000000000000e+04
			
 
				+-3.243900000000000000e+04
			
 
				+-1.042400000000000000e+04
			
 
				+3.044100000000000000e+04
			
 
				+-2.847000000000000000e+03
			
 
				+1.623400000000000000e+04
			
 
				+1.445200000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/input.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/input.txt
@@ -0,0 +1,49 @@
 
				+# 3,4,4,8
			
 
				+1.522800000000000000e+04,9.887000000000000000e+03,-1.437200000000000000e+04,-2.187100000000000000e+04,3.624000000000000000e+03,1.940400000000000000e+04,-2.475200000000000000e+04,6.792000000000000000e+03
			
 
				+2.304000000000000000e+03,1.136400000000000000e+04,-1.790400000000000000e+04,2.731200000000000000e+04,3.347000000000000000e+03,2.787000000000000000e+03,1.239000000000000000e+03,-1.933400000000000000e+04
			
 
				+-1.873600000000000000e+04,-2.987300000000000000e+04,4.513000000000000000e+03,-2.252000000000000000e+04,2.063500000000000000e+04,-2.506900000000000000e+04,-2.050600000000000000e+04,3.260400000000000000e+04
			
 
				+8.989000000000000000e+03,-2.437000000000000000e+04,-1.238600000000000000e+04,2.294200000000000000e+04,-2.392800000000000000e+04,-1.731600000000000000e+04,-1.353900000000000000e+04,5.650000000000000000e+02
			
 
				+-1.099400000000000000e+04,1.802000000000000000e+04,1.584800000000000000e+04,2.587000000000000000e+03,-2.565300000000000000e+04,-1.128900000000000000e+04,2.140100000000000000e+04,8.156000000000000000e+03
			
 
				+1.060400000000000000e+04,-1.178400000000000000e+04,6.412000000000000000e+03,1.780400000000000000e+04,-2.457900000000000000e+04,-1.187000000000000000e+04,3.088400000000000000e+04,-1.570000000000000000e+04
			
 
				+-2.448100000000000000e+04,9.393000000000000000e+03,-1.179000000000000000e+03,1.893500000000000000e+04,2.741600000000000000e+04,2.821200000000000000e+04,2.124800000000000000e+04,-2.597700000000000000e+04
			
 
				+1.553200000000000000e+04,-2.227300000000000000e+04,1.722000000000000000e+03,2.467100000000000000e+04,-1.128700000000000000e+04,1.981800000000000000e+04,-1.681500000000000000e+04,7.134000000000000000e+03
			
 
				+2.982100000000000000e+04,2.851700000000000000e+04,2.612600000000000000e+04,3.106900000000000000e+04,2.232000000000000000e+03,1.179800000000000000e+04,-3.138700000000000000e+04,-3.093000000000000000e+04
			
 
				+-3.193400000000000000e+04,-1.316400000000000000e+04,4.106000000000000000e+03,3.177600000000000000e+04,1.665000000000000000e+03,3.496000000000000000e+03,5.557000000000000000e+03,-6.483000000000000000e+03
			
 
				+2.857000000000000000e+03,-9.361000000000000000e+03,2.245500000000000000e+04,1.088000000000000000e+03,-1.185400000000000000e+04,-3.239500000000000000e+04,-3.193400000000000000e+04,-6.768000000000000000e+03
			
 
				+2.718000000000000000e+03,1.822100000000000000e+04,-1.758800000000000000e+04,-4.744000000000000000e+03,1.878500000000000000e+04,1.768000000000000000e+04,2.941000000000000000e+03,-5.660000000000000000e+02
			
 
				+3.212300000000000000e+04,-9.715000000000000000e+03,-2.802400000000000000e+04,2.229700000000000000e+04,-1.389400000000000000e+04,2.319500000000000000e+04,-2.969200000000000000e+04,-2.499900000000000000e+04
			
 
				+3.389000000000000000e+03,-1.652300000000000000e+04,2.056000000000000000e+03,8.969000000000000000e+03,2.437200000000000000e+04,-2.940600000000000000e+04,1.940600000000000000e+04,5.839000000000000000e+03
			
 
				+-1.068700000000000000e+04,1.184600000000000000e+04,-9.750000000000000000e+03,3.677000000000000000e+03,3.128900000000000000e+04,4.790000000000000000e+03,2.033400000000000000e+04,3.979000000000000000e+03
			
 
				+5.898000000000000000e+03,2.464900000000000000e+04,1.681500000000000000e+04,-2.610000000000000000e+02,-1.015200000000000000e+04,-4.572000000000000000e+03,-3.103300000000000000e+04,3.050800000000000000e+04
			
 
				+1.251000000000000000e+04,2.435600000000000000e+04,1.613100000000000000e+04,-7.928000000000000000e+03,-4.224000000000000000e+03,-1.417600000000000000e+04,1.498000000000000000e+03,-2.097600000000000000e+04
			
 
				+-2.602200000000000000e+04,5.688000000000000000e+03,1.932900000000000000e+04,8.434000000000000000e+03,-3.564000000000000000e+03,1.820000000000000000e+02,-3.045600000000000000e+04,-3.243000000000000000e+03
			
 
				+-1.343900000000000000e+04,2.203000000000000000e+03,-3.265900000000000000e+04,2.699600000000000000e+04,-2.750800000000000000e+04,-1.428600000000000000e+04,-1.928100000000000000e+04,-9.613000000000000000e+03
			
 
				+5.932000000000000000e+03,-5.671000000000000000e+03,-1.911000000000000000e+03,-2.105700000000000000e+04,1.667100000000000000e+04,-2.410300000000000000e+04,-5.186000000000000000e+03,7.524000000000000000e+03
			
 
				+-2.810000000000000000e+04,-1.089200000000000000e+04,-2.059800000000000000e+04,-1.559800000000000000e+04,-1.587500000000000000e+04,-3.088000000000000000e+04,-1.384100000000000000e+04,-2.882700000000000000e+04
			
 
				+-1.433300000000000000e+04,2.718100000000000000e+04,3.003500000000000000e+04,-3.785000000000000000e+03,3.131200000000000000e+04,-2.263000000000000000e+03,4.563000000000000000e+03,2.323000000000000000e+04
			
 
				+-1.201900000000000000e+04,-2.596200000000000000e+04,2.824500000000000000e+04,-1.616500000000000000e+04,4.083000000000000000e+03,-1.462900000000000000e+04,-8.474000000000000000e+03,-2.576100000000000000e+04
			
 
				+6.290000000000000000e+02,-2.307500000000000000e+04,-1.649600000000000000e+04,7.769000000000000000e+03,-1.865200000000000000e+04,2.647000000000000000e+04,1.364500000000000000e+04,9.974000000000000000e+03
			
 
				+-1.649500000000000000e+04,2.921700000000000000e+04,5.501000000000000000e+03,-9.689000000000000000e+03,-2.906600000000000000e+04,-2.250400000000000000e+04,-4.127000000000000000e+03,1.546200000000000000e+04
			
 
				+7.312000000000000000e+03,2.683200000000000000e+04,-2.886000000000000000e+03,2.164100000000000000e+04,-3.005300000000000000e+04,-5.761000000000000000e+03,-1.546100000000000000e+04,-2.473900000000000000e+04
			
 
				+1.517700000000000000e+04,-2.636200000000000000e+04,-2.965500000000000000e+04,-1.492500000000000000e+04,-1.917600000000000000e+04,-1.100900000000000000e+04,-1.078000000000000000e+04,-1.755000000000000000e+03
			
 
				+2.599400000000000000e+04,8.118000000000000000e+03,2.588800000000000000e+04,-1.022300000000000000e+04,-3.459000000000000000e+03,4.649000000000000000e+03,3.127700000000000000e+04,-2.472300000000000000e+04
			
 
				+-2.373300000000000000e+04,1.975900000000000000e+04,1.071300000000000000e+04,-1.487900000000000000e+04,-1.656300000000000000e+04,2.675000000000000000e+04,-3.169600000000000000e+04,1.746700000000000000e+04
			
 
				+6.851000000000000000e+03,3.261400000000000000e+04,2.414000000000000000e+03,-1.464000000000000000e+04,1.449000000000000000e+03,3.156200000000000000e+04,2.416000000000000000e+03,-6.229000000000000000e+03
			
 
				+-3.024400000000000000e+04,1.087200000000000000e+04,-2.374200000000000000e+04,1.354600000000000000e+04,-5.255000000000000000e+03,-9.880000000000000000e+02,-2.911100000000000000e+04,3.177700000000000000e+04
			
 
				+-1.427000000000000000e+04,-2.342200000000000000e+04,-3.047900000000000000e+04,7.836000000000000000e+03,-2.478800000000000000e+04,2.743800000000000000e+04,1.671200000000000000e+04,5.935000000000000000e+03
			
 
				+-8.200000000000000000e+01,2.890000000000000000e+03,3.254200000000000000e+04,-8.496000000000000000e+03,-3.049800000000000000e+04,-3.060800000000000000e+04,8.851000000000000000e+03,6.119000000000000000e+03
			
 
				+3.074200000000000000e+04,1.811500000000000000e+04,-1.902800000000000000e+04,-3.732000000000000000e+03,-1.490200000000000000e+04,-1.774400000000000000e+04,-2.804000000000000000e+03,-6.760000000000000000e+03
			
 
				+3.115800000000000000e+04,-8.253000000000000000e+03,7.319000000000000000e+03,-1.169000000000000000e+04,1.700800000000000000e+04,1.537100000000000000e+04,3.112000000000000000e+03,3.248100000000000000e+04
			
 
				+6.340000000000000000e+02,3.029700000000000000e+04,3.072400000000000000e+04,6.967000000000000000e+03,-2.535700000000000000e+04,-2.806300000000000000e+04,2.002900000000000000e+04,-1.695400000000000000e+04
			
 
				+7.477000000000000000e+03,1.602600000000000000e+04,1.492100000000000000e+04,2.960100000000000000e+04,1.152500000000000000e+04,-1.368300000000000000e+04,-4.390000000000000000e+02,-2.803400000000000000e+04
			
 
				+-1.273600000000000000e+04,6.472000000000000000e+03,-6.050000000000000000e+03,2.662000000000000000e+03,-2.510500000000000000e+04,-1.032800000000000000e+04,-1.931400000000000000e+04,2.402000000000000000e+03
			
 
				+2.514300000000000000e+04,-1.959600000000000000e+04,-8.183000000000000000e+03,2.180600000000000000e+04,8.676000000000000000e+03,-1.240800000000000000e+04,-2.369200000000000000e+04,-2.126400000000000000e+04
			
 
				+-1.554000000000000000e+04,-1.654800000000000000e+04,-3.244400000000000000e+04,1.719200000000000000e+04,-2.125400000000000000e+04,3.030900000000000000e+04,1.308300000000000000e+04,1.742500000000000000e+04
			
 
				+3.059400000000000000e+04,-2.528700000000000000e+04,-9.130000000000000000e+03,2.573700000000000000e+04,-1.486200000000000000e+04,-8.582000000000000000e+03,-1.521000000000000000e+03,2.762700000000000000e+04
			
 
				+2.648600000000000000e+04,-1.967800000000000000e+04,2.829900000000000000e+04,-1.748900000000000000e+04,1.976900000000000000e+04,-7.630000000000000000e+02,1.531100000000000000e+04,1.440100000000000000e+04
			
 
				+-3.107900000000000000e+04,1.774700000000000000e+04,5.782000000000000000e+03,-2.142100000000000000e+04,-9.977000000000000000e+03,-2.059000000000000000e+03,1.768600000000000000e+04,2.904600000000000000e+04
			
 
				+5.543000000000000000e+03,-1.058300000000000000e+04,-7.955000000000000000e+03,-3.090000000000000000e+02,7.523000000000000000e+03,-1.984300000000000000e+04,-1.641800000000000000e+04,1.120000000000000000e+03
			
 
				+3.568000000000000000e+03,1.452900000000000000e+04,1.045700000000000000e+04,-1.939100000000000000e+04,-2.413900000000000000e+04,-3.775000000000000000e+03,2.893000000000000000e+03,-1.216800000000000000e+04
			
 
				+-3.228600000000000000e+04,-1.538600000000000000e+04,-2.795500000000000000e+04,6.214000000000000000e+03,2.296300000000000000e+04,7.639000000000000000e+03,8.214000000000000000e+03,-2.776500000000000000e+04
			
 
				+7.540000000000000000e+02,-2.588500000000000000e+04,-2.242400000000000000e+04,2.554900000000000000e+04,-3.149800000000000000e+04,-1.718300000000000000e+04,-2.830000000000000000e+03,2.030900000000000000e+04
			
 
				+6.080000000000000000e+03,-1.375000000000000000e+04,2.449000000000000000e+03,-2.875600000000000000e+04,-2.349300000000000000e+04,-2.282200000000000000e+04,2.341800000000000000e+04,2.132000000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/kernel.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/kernel.txt
@@ -0,0 +1,49 @@
 
				+# 2,3,8,1
			
 
				+1.687400000000000000e+04
			
 
				+-3.052000000000000000e+03
			
 
				+-2.187000000000000000e+04
			
 
				+2.320500000000000000e+04
			
 
				+-2.211400000000000000e+04
			
 
				+-4.300000000000000000e+03
			
 
				+2.142800000000000000e+04
			
 
				+2.061200000000000000e+04
			
 
				+1.543000000000000000e+03
			
 
				+-1.133500000000000000e+04
			
 
				+-1.322700000000000000e+04
			
 
				+1.866800000000000000e+04
			
 
				+2.069200000000000000e+04
			
 
				+-1.480400000000000000e+04
			
 
				+1.228800000000000000e+04
			
 
				+-2.144400000000000000e+04
			
 
				+-1.433200000000000000e+04
			
 
				+1.148200000000000000e+04
			
 
				+1.150700000000000000e+04
			
 
				+3.079100000000000000e+04
			
 
				+9.215000000000000000e+03
			
 
				+-1.604800000000000000e+04
			
 
				+-2.695000000000000000e+03
			
 
				+3.236200000000000000e+04
			
 
				+-1.708800000000000000e+04
			
 
				+-9.248000000000000000e+03
			
 
				+2.567700000000000000e+04
			
 
				+-2.467300000000000000e+04
			
 
				+-1.088800000000000000e+04
			
 
				+4.489000000000000000e+03
			
 
				+-2.311000000000000000e+03
			
 
				+5.757000000000000000e+03
			
 
				+-3.092400000000000000e+04
			
 
				+-9.495000000000000000e+03
			
 
				+7.470000000000000000e+02
			
 
				+5.188000000000000000e+03
			
 
				+-3.144800000000000000e+04
			
 
				+-2.247800000000000000e+04
			
 
				+1.640600000000000000e+04
			
 
				+1.274700000000000000e+04
			
 
				+-4.634000000000000000e+03
			
 
				+-8.142000000000000000e+03
			
 
				+-5.032000000000000000e+03
			
 
				+-2.549000000000000000e+03
			
 
				+1.860000000000000000e+02
			
 
				+9.170000000000000000e+03
			
 
				+3.372000000000000000e+03
			
 
				+-1.727500000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_null_bias/input.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_null_bias/input.txt
@@ -0,0 +1,17 @@
 
				+# 1,4,4,8
			
 
				+-1.857700000000000000e+04,2.208200000000000000e+04,1.901500000000000000e+04,9.939000000000000000e+03,-9.865000000000000000e+03,-2.051200000000000000e+04,-9.998000000000000000e+03,-2.266500000000000000e+04
			
 
				+-2.129000000000000000e+04,-1.967200000000000000e+04,4.065000000000000000e+03,-1.061700000000000000e+04,1.641000000000000000e+04,3.161100000000000000e+04,-7.291000000000000000e+03,2.192700000000000000e+04
			
 
				+-1.942000000000000000e+04,4.760000000000000000e+02,-2.439500000000000000e+04,9.624000000000000000e+03,2.733000000000000000e+04,7.170000000000000000e+02,7.899000000000000000e+03,5.117000000000000000e+03
			
 
				+2.449000000000000000e+04,-3.250700000000000000e+04,1.771000000000000000e+03,-9.298000000000000000e+03,3.005900000000000000e+04,4.552000000000000000e+03,4.894000000000000000e+03,3.098300000000000000e+04
			
 
				+2.786800000000000000e+04,-4.336000000000000000e+03,2.837700000000000000e+04,-3.259900000000000000e+04,-2.879500000000000000e+04,-1.817900000000000000e+04,1.406800000000000000e+04,1.004800000000000000e+04
			
 
				+-1.337200000000000000e+04,-1.781300000000000000e+04,5.292000000000000000e+03,5.836000000000000000e+03,2.725700000000000000e+04,-1.773000000000000000e+04,2.258000000000000000e+04,3.089700000000000000e+04
			
 
				+2.483700000000000000e+04,3.140700000000000000e+04,-3.356000000000000000e+03,-2.739400000000000000e+04,2.332400000000000000e+04,-1.121000000000000000e+03,3.040500000000000000e+04,1.250600000000000000e+04
			
 
				+-5.824000000000000000e+03,1.733300000000000000e+04,2.131600000000000000e+04,-3.119000000000000000e+03,1.214200000000000000e+04,-1.581200000000000000e+04,-8.362000000000000000e+03,1.348100000000000000e+04
			
 
				+8.010000000000000000e+03,-1.640900000000000000e+04,-1.384000000000000000e+03,-9.159000000000000000e+03,-1.792900000000000000e+04,-2.046900000000000000e+04,5.666000000000000000e+03,7.685000000000000000e+03
			
 
				+2.171500000000000000e+04,8.290000000000000000e+03,-1.860500000000000000e+04,2.908700000000000000e+04,2.482700000000000000e+04,-7.327000000000000000e+03,-3.181800000000000000e+04,-2.473100000000000000e+04
			
 
				+2.892000000000000000e+04,-6.734000000000000000e+03,3.144300000000000000e+04,-2.070000000000000000e+03,-8.030000000000000000e+03,2.869000000000000000e+03,-2.698200000000000000e+04,1.526900000000000000e+04
			
 
				+1.048100000000000000e+04,1.358200000000000000e+04,-3.042600000000000000e+04,4.356000000000000000e+03,-1.475800000000000000e+04,-1.125100000000000000e+04,1.597300000000000000e+04,1.967500000000000000e+04
			
 
				+-2.068500000000000000e+04,8.198000000000000000e+03,-2.278100000000000000e+04,1.346900000000000000e+04,-2.744000000000000000e+04,3.354000000000000000e+03,-2.895300000000000000e+04,2.339800000000000000e+04
			
 
				+2.490000000000000000e+04,2.579200000000000000e+04,-2.324400000000000000e+04,-2.801000000000000000e+03,-9.152000000000000000e+03,1.770200000000000000e+04,2.128500000000000000e+04,-2.386400000000000000e+04
			
 
				+-1.123100000000000000e+04,-1.282600000000000000e+04,-2.178000000000000000e+03,-2.346200000000000000e+04,-9.906000000000000000e+03,8.555000000000000000e+03,-6.807000000000000000e+03,-3.570000000000000000e+02
			
 
				+-2.388200000000000000e+04,-3.225000000000000000e+04,2.946600000000000000e+04,1.825500000000000000e+04,1.583600000000000000e+04,-2.913500000000000000e+04,-1.872400000000000000e+04,-2.574300000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_null_bias/kernel.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_null_bias/kernel.txt
@@ -0,0 +1,33 @@
 
				+# 2,2,8,1
			
 
				+-6.966000000000000000e+03
			
 
				+-6.832000000000000000e+03
			
 
				+1.216900000000000000e+04
			
 
				+3.284000000000000000e+03
			
 
				+-2.968100000000000000e+04
			
 
				+2.983400000000000000e+04
			
 
				+1.598500000000000000e+04
			
 
				+2.407200000000000000e+04
			
 
				+-2.409500000000000000e+04
			
 
				+-9.319000000000000000e+03
			
 
				+1.043500000000000000e+04
			
 
				+1.787700000000000000e+04
			
 
				+1.350500000000000000e+04
			
 
				+3.029500000000000000e+04
			
 
				+9.635000000000000000e+03
			
 
				+2.507400000000000000e+04
			
 
				+-2.418500000000000000e+04
			
 
				+1.819200000000000000e+04
			
 
				+2.434400000000000000e+04
			
 
				+-3.168100000000000000e+04
			
 
				+2.931800000000000000e+04
			
 
				+-2.652300000000000000e+04
			
 
				+-1.711800000000000000e+04
			
 
				+-4.391000000000000000e+03
			
 
				+1.497000000000000000e+03
			
 
				+2.695800000000000000e+04
			
 
				+9.153000000000000000e+03
			
 
				+-6.281000000000000000e+03
			
 
				+3.063400000000000000e+04
			
 
				+-1.109700000000000000e+04
			
 
				+2.385300000000000000e+04
			
 
				+-2.211300000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_spill_null_bias/input.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_spill_null_bias/input.txt
@@ -0,0 +1,49 @@
 
				+# 3,4,4,5
			
 
				+-7.319000000000000000e+03,2.429900000000000000e+04,-2.701500000000000000e+04,1.816400000000000000e+04,-2.412800000000000000e+04
			
 
				+1.886000000000000000e+03,-6.480000000000000000e+03,3.192200000000000000e+04,-3.276700000000000000e+04,3.153900000000000000e+04
			
 
				+-1.918800000000000000e+04,-2.647800000000000000e+04,-2.564000000000000000e+03,2.787300000000000000e+04,-5.675000000000000000e+03
			
 
				+7.433000000000000000e+03,3.115900000000000000e+04,-1.389800000000000000e+04,5.334000000000000000e+03,1.596200000000000000e+04
			
 
				+-1.190000000000000000e+04,-1.670800000000000000e+04,5.430000000000000000e+03,-9.333000000000000000e+03,-2.557000000000000000e+04
			
 
				+2.519100000000000000e+04,3.118300000000000000e+04,2.879800000000000000e+04,1.172400000000000000e+04,3.145200000000000000e+04
			
 
				+-1.654600000000000000e+04,3.181200000000000000e+04,-3.131300000000000000e+04,-1.796800000000000000e+04,-3.231000000000000000e+03
			
 
				+-2.941400000000000000e+04,8.278000000000000000e+03,3.366000000000000000e+03,-1.947800000000000000e+04,9.861000000000000000e+03
			
 
				+2.360000000000000000e+04,3.141400000000000000e+04,1.224000000000000000e+03,-3.213900000000000000e+04,2.826200000000000000e+04
			
 
				+-2.621500000000000000e+04,3.336000000000000000e+03,1.701100000000000000e+04,-2.727300000000000000e+04,-2.001300000000000000e+04
			
 
				+-2.811100000000000000e+04,2.001200000000000000e+04,6.270000000000000000e+03,-1.694900000000000000e+04,-4.120000000000000000e+03
			
 
				+1.989200000000000000e+04,1.224400000000000000e+04,-2.741400000000000000e+04,-2.561500000000000000e+04,1.869600000000000000e+04
			
 
				+-2.366000000000000000e+04,-2.076800000000000000e+04,2.692200000000000000e+04,3.737000000000000000e+03,1.541900000000000000e+04
			
 
				+5.561000000000000000e+03,-2.868000000000000000e+03,-1.664100000000000000e+04,8.907000000000000000e+03,1.355000000000000000e+04
			
 
				+3.214100000000000000e+04,-1.790400000000000000e+04,-3.154000000000000000e+04,-2.679300000000000000e+04,2.555200000000000000e+04
			
 
				+2.885600000000000000e+04,3.157600000000000000e+04,6.202000000000000000e+03,-1.488800000000000000e+04,2.376200000000000000e+04
			
 
				+-8.198000000000000000e+03,9.875000000000000000e+03,-1.312600000000000000e+04,-1.693100000000000000e+04,-9.526000000000000000e+03
			
 
				+-1.605500000000000000e+04,2.672700000000000000e+04,2.704100000000000000e+04,-1.252900000000000000e+04,8.293000000000000000e+03
			
 
				+5.669000000000000000e+03,6.974000000000000000e+03,5.096000000000000000e+03,2.317300000000000000e+04,7.041000000000000000e+03
			
 
				+-2.314000000000000000e+04,-1.832100000000000000e+04,3.989000000000000000e+03,2.079700000000000000e+04,-2.613900000000000000e+04
			
 
				+-1.948000000000000000e+04,1.581800000000000000e+04,-3.253000000000000000e+03,-3.240100000000000000e+04,1.478900000000000000e+04
			
 
				+-2.821500000000000000e+04,2.850300000000000000e+04,7.393000000000000000e+03,1.232500000000000000e+04,7.971000000000000000e+03
			
 
				+-2.889600000000000000e+04,-2.121300000000000000e+04,-2.173900000000000000e+04,3.057600000000000000e+04,-1.013500000000000000e+04
			
 
				+4.547000000000000000e+03,-1.594000000000000000e+03,3.162700000000000000e+04,-7.316000000000000000e+03,-1.917000000000000000e+03
			
 
				+3.168500000000000000e+04,1.691700000000000000e+04,2.938700000000000000e+04,1.745300000000000000e+04,-2.403000000000000000e+04
			
 
				+-2.852600000000000000e+04,-2.540300000000000000e+04,1.647300000000000000e+04,-3.129400000000000000e+04,-2.136600000000000000e+04
			
 
				+2.998900000000000000e+04,2.488300000000000000e+04,-4.928000000000000000e+03,2.724100000000000000e+04,8.780000000000000000e+03
			
 
				+-1.027000000000000000e+03,-1.847200000000000000e+04,-1.963400000000000000e+04,3.790000000000000000e+02,1.293800000000000000e+04
			
 
				+-1.043600000000000000e+04,-1.728600000000000000e+04,2.427400000000000000e+04,2.109500000000000000e+04,-1.127200000000000000e+04
			
 
				+1.912700000000000000e+04,-2.943700000000000000e+04,2.608100000000000000e+04,2.487000000000000000e+03,3.074000000000000000e+03
			
 
				+-2.159200000000000000e+04,-3.022500000000000000e+04,2.899000000000000000e+03,-1.151200000000000000e+04,-2.633600000000000000e+04
			
 
				+-1.193600000000000000e+04,-1.892500000000000000e+04,-2.196100000000000000e+04,-2.422500000000000000e+04,-3.209200000000000000e+04
			
 
				+-7.743000000000000000e+03,2.801700000000000000e+04,7.708000000000000000e+03,3.289000000000000000e+03,1.363200000000000000e+04
			
 
				+2.982500000000000000e+04,-2.397000000000000000e+03,1.392700000000000000e+04,1.681300000000000000e+04,1.546500000000000000e+04
			
 
				+2.567000000000000000e+03,8.863000000000000000e+03,2.010600000000000000e+04,2.346700000000000000e+04,3.134400000000000000e+04
			
 
				+-5.870000000000000000e+02,1.389200000000000000e+04,5.223000000000000000e+03,3.213800000000000000e+04,2.198200000000000000e+04
			
 
				+-1.310000000000000000e+03,2.106000000000000000e+03,-1.471900000000000000e+04,1.361000000000000000e+03,-6.400000000000000000e+03
			
 
				+-1.569700000000000000e+04,-2.556900000000000000e+04,4.561000000000000000e+03,1.776600000000000000e+04,1.316200000000000000e+04
			
 
				+1.501100000000000000e+04,1.904000000000000000e+04,2.914200000000000000e+04,9.900000000000000000e+03,-5.609000000000000000e+03
			
 
				+2.046700000000000000e+04,1.415500000000000000e+04,5.410000000000000000e+03,9.317000000000000000e+03,2.058900000000000000e+04
			
 
				+-3.171600000000000000e+04,2.989100000000000000e+04,-1.812200000000000000e+04,1.178500000000000000e+04,3.168300000000000000e+04
			
 
				+-6.612000000000000000e+03,-1.952200000000000000e+04,-3.132000000000000000e+04,8.707000000000000000e+03,-1.241200000000000000e+04
			
 
				+3.052500000000000000e+04,-4.086000000000000000e+03,-1.875200000000000000e+04,3.171100000000000000e+04,-1.020800000000000000e+04
			
 
				+2.101600000000000000e+04,-7.797000000000000000e+03,1.253900000000000000e+04,3.207500000000000000e+04,-2.303400000000000000e+04
			
 
				+2.483000000000000000e+04,-1.619600000000000000e+04,2.850700000000000000e+04,3.044400000000000000e+04,-7.734000000000000000e+03
			
 
				+-2.571900000000000000e+04,2.208200000000000000e+04,1.941100000000000000e+04,-2.953500000000000000e+04,-2.174600000000000000e+04
			
 
				+-2.661200000000000000e+04,-2.151400000000000000e+04,-2.395400000000000000e+04,-2.084400000000000000e+04,-2.825400000000000000e+04
			
 
				+1.724700000000000000e+04,-2.944400000000000000e+04,3.099600000000000000e+04,-6.366000000000000000e+03,1.848900000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_spill_null_bias/kernel.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_spill_null_bias/kernel.txt
@@ -0,0 +1,46 @@
 
				+# 3,3,5,1
			
 
				+-2.947300000000000000e+04
			
 
				+2.344700000000000000e+04
			
 
				+3.034100000000000000e+04
			
 
				+8.450000000000000000e+03
			
 
				+2.395000000000000000e+04
			
 
				+2.188200000000000000e+04
			
 
				+-2.413400000000000000e+04
			
 
				+-1.370400000000000000e+04
			
 
				+1.579600000000000000e+04
			
 
				+-3.169700000000000000e+04
			
 
				+3.029900000000000000e+04
			
 
				+-1.318400000000000000e+04
			
 
				+-2.559000000000000000e+03
			
 
				+5.593000000000000000e+03
			
 
				+1.061100000000000000e+04
			
 
				+-2.498700000000000000e+04
			
 
				+-2.823000000000000000e+04
			
 
				+8.110000000000000000e+02
			
 
				+9.135000000000000000e+03
			
 
				+-7.581000000000000000e+03
			
 
				+9.142000000000000000e+03
			
 
				+5.179000000000000000e+03
			
 
				+1.367200000000000000e+04
			
 
				+1.369300000000000000e+04
			
 
				+2.017400000000000000e+04
			
 
				+-2.105600000000000000e+04
			
 
				+8.654000000000000000e+03
			
 
				+3.145000000000000000e+03
			
 
				+3.022700000000000000e+04
			
 
				+1.797400000000000000e+04
			
 
				+2.311000000000000000e+04
			
 
				+5.464000000000000000e+03
			
 
				+2.594700000000000000e+04
			
 
				+1.503900000000000000e+04
			
 
				+1.574600000000000000e+04
			
 
				+2.271700000000000000e+04
			
 
				+-3.250200000000000000e+04
			
 
				+-2.328400000000000000e+04
			
 
				+-1.325300000000000000e+04
			
 
				+1.076500000000000000e+04
			
 
				+-1.993500000000000000e+04
			
 
				+-1.922800000000000000e+04
			
 
				+6.842000000000000000e+03
			
 
				+-1.644500000000000000e+04
			
 
				+-3.141600000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_stride_null_bias/input.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_stride_null_bias/input.txt
@@ -0,0 +1,33 @@
 
				+# 2,4,4,8
			
 
				+1.336300000000000000e+04,-2.706200000000000000e+04,1.368500000000000000e+04,-1.925600000000000000e+04,1.338600000000000000e+04,-2.481400000000000000e+04,-1.664100000000000000e+04,-8.110000000000000000e+03
			
 
				+2.870700000000000000e+04,2.327000000000000000e+03,4.496000000000000000e+03,-2.801700000000000000e+04,-9.455000000000000000e+03,1.201400000000000000e+04,2.666000000000000000e+03,2.834500000000000000e+04
			
 
				+-2.254400000000000000e+04,6.081000000000000000e+03,-2.147300000000000000e+04,-1.989700000000000000e+04,-3.191200000000000000e+04,2.458900000000000000e+04,1.586000000000000000e+03,3.373000000000000000e+03
			
 
				+2.989500000000000000e+04,1.706100000000000000e+04,5.695000000000000000e+03,-2.813000000000000000e+04,2.018100000000000000e+04,1.643400000000000000e+04,2.913000000000000000e+03,1.169500000000000000e+04
			
 
				+-3.875000000000000000e+03,-2.955000000000000000e+04,-3.478000000000000000e+03,-7.687000000000000000e+03,-2.597800000000000000e+04,-2.148400000000000000e+04,2.030500000000000000e+04,2.766800000000000000e+04
			
 
				+-1.682100000000000000e+04,1.058800000000000000e+04,1.162400000000000000e+04,4.562000000000000000e+03,6.732000000000000000e+03,-2.422800000000000000e+04,-9.850000000000000000e+02,-1.547600000000000000e+04
			
 
				+-1.177800000000000000e+04,2.239500000000000000e+04,1.981000000000000000e+03,-4.412000000000000000e+03,-2.537600000000000000e+04,2.777400000000000000e+04,5.225000000000000000e+03,-8.601000000000000000e+03
			
 
				+-6.840000000000000000e+03,1.011000000000000000e+04,-8.133000000000000000e+03,-2.168000000000000000e+03,-2.262800000000000000e+04,-3.124900000000000000e+04,-1.999100000000000000e+04,3.760000000000000000e+03
			
 
				+1.211600000000000000e+04,1.338800000000000000e+04,-1.854800000000000000e+04,1.526100000000000000e+04,-2.820900000000000000e+04,1.766700000000000000e+04,-8.684000000000000000e+03,-8.110000000000000000e+03
			
 
				+8.420000000000000000e+02,1.476000000000000000e+04,-2.639200000000000000e+04,-2.242200000000000000e+04,1.630300000000000000e+04,-1.095400000000000000e+04,-2.298800000000000000e+04,6.944000000000000000e+03
			
 
				+1.338400000000000000e+04,-8.538000000000000000e+03,2.478500000000000000e+04,-7.726000000000000000e+03,-3.206100000000000000e+04,-1.616700000000000000e+04,-9.307000000000000000e+03,2.114900000000000000e+04
			
 
				+1.638800000000000000e+04,-8.987000000000000000e+03,-2.238900000000000000e+04,2.333600000000000000e+04,2.632400000000000000e+04,-7.298000000000000000e+03,-1.759500000000000000e+04,3.232400000000000000e+04
			
 
				+2.517000000000000000e+04,2.251200000000000000e+04,-2.032000000000000000e+04,2.286100000000000000e+04,8.725000000000000000e+03,-5.506000000000000000e+03,1.077500000000000000e+04,-3.224800000000000000e+04
			
 
				+8.500000000000000000e+03,2.927000000000000000e+03,1.117900000000000000e+04,-2.547200000000000000e+04,-1.960000000000000000e+04,-1.121700000000000000e+04,-1.463800000000000000e+04,1.395500000000000000e+04
			
 
				+1.035600000000000000e+04,2.676200000000000000e+04,-2.915900000000000000e+04,2.290400000000000000e+04,-4.691000000000000000e+03,-4.378000000000000000e+03,2.274400000000000000e+04,1.680900000000000000e+04
			
 
				+-1.386100000000000000e+04,-2.473000000000000000e+04,-1.753000000000000000e+03,-1.451100000000000000e+04,-1.769300000000000000e+04,5.360000000000000000e+02,1.392700000000000000e+04,1.868000000000000000e+03
			
 
				+-9.317000000000000000e+03,-1.982800000000000000e+04,5.988000000000000000e+03,9.475000000000000000e+03,2.110100000000000000e+04,3.047500000000000000e+04,1.326000000000000000e+04,-2.810000000000000000e+02
			
 
				+-1.622500000000000000e+04,-2.670800000000000000e+04,2.092200000000000000e+04,-1.711000000000000000e+04,2.963900000000000000e+04,1.584700000000000000e+04,2.953400000000000000e+04,2.710500000000000000e+04
			
 
				+-2.444100000000000000e+04,-5.009000000000000000e+03,-1.065000000000000000e+03,1.174500000000000000e+04,-1.548700000000000000e+04,2.051500000000000000e+04,2.565100000000000000e+04,2.351500000000000000e+04
			
 
				+4.366000000000000000e+03,9.555000000000000000e+03,-3.067200000000000000e+04,-9.989000000000000000e+03,2.319600000000000000e+04,-1.354900000000000000e+04,-5.370000000000000000e+03,2.538700000000000000e+04
			
 
				+1.283000000000000000e+03,-9.193000000000000000e+03,1.219300000000000000e+04,-2.487500000000000000e+04,-1.239400000000000000e+04,-2.455500000000000000e+04,2.119300000000000000e+04,2.504000000000000000e+03
			
 
				+-1.356200000000000000e+04,3.829000000000000000e+03,-9.883000000000000000e+03,3.917000000000000000e+03,-9.066000000000000000e+03,2.914700000000000000e+04,2.792200000000000000e+04,1.172700000000000000e+04
			
 
				+-3.058800000000000000e+04,1.160400000000000000e+04,2.216300000000000000e+04,-3.817000000000000000e+03,-3.116800000000000000e+04,2.912900000000000000e+04,-1.960600000000000000e+04,1.821000000000000000e+04
			
 
				+-6.665000000000000000e+03,-3.077400000000000000e+04,-1.956000000000000000e+03,2.248300000000000000e+04,-2.159100000000000000e+04,2.337000000000000000e+04,2.919000000000000000e+03,-1.680200000000000000e+04
			
 
				+3.096300000000000000e+04,2.643100000000000000e+04,3.124500000000000000e+04,1.514800000000000000e+04,2.264500000000000000e+04,1.259300000000000000e+04,1.413000000000000000e+03,1.048700000000000000e+04
			
 
				+1.287900000000000000e+04,-1.117700000000000000e+04,4.104000000000000000e+03,-5.148000000000000000e+03,-1.215800000000000000e+04,-8.878000000000000000e+03,1.300700000000000000e+04,1.613600000000000000e+04
			
 
				+9.350000000000000000e+03,-6.840000000000000000e+03,3.851000000000000000e+03,-2.257700000000000000e+04,2.656000000000000000e+03,-2.970600000000000000e+04,-1.898700000000000000e+04,3.223100000000000000e+04
			
 
				+2.906300000000000000e+04,-1.620700000000000000e+04,2.515900000000000000e+04,2.324200000000000000e+04,3.275300000000000000e+04,2.673800000000000000e+04,2.373200000000000000e+04,-1.251500000000000000e+04
			
 
				+1.595300000000000000e+04,3.253000000000000000e+04,-2.427700000000000000e+04,-8.282000000000000000e+03,-4.189000000000000000e+03,-3.245900000000000000e+04,2.595300000000000000e+04,-1.848500000000000000e+04
			
 
				+1.700300000000000000e+04,9.567000000000000000e+03,-4.648000000000000000e+03,-2.588300000000000000e+04,6.010000000000000000e+02,-7.937000000000000000e+03,-2.580800000000000000e+04,-1.179300000000000000e+04
			
 
				+-1.749000000000000000e+03,-1.667000000000000000e+03,1.763900000000000000e+04,-9.424000000000000000e+03,-7.337000000000000000e+03,1.054900000000000000e+04,3.060700000000000000e+04,-1.617400000000000000e+04
			
 
				+7.250000000000000000e+02,9.412000000000000000e+03,2.952200000000000000e+04,-1.288000000000000000e+03,-3.164700000000000000e+04,8.673000000000000000e+03,-3.100800000000000000e+04,4.926000000000000000e+03
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_stride_null_bias/kernel.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_stride_null_bias/kernel.txt
@@ -0,0 +1,33 @@
 
				+# 2,2,8,1
			
 
				+-1.239900000000000000e+04
			
 
				+2.623300000000000000e+04
			
 
				+-1.829900000000000000e+04
			
 
				+-2.663200000000000000e+04
			
 
				+8.750000000000000000e+02
			
 
				+-2.812100000000000000e+04
			
 
				+2.181200000000000000e+04
			
 
				+-1.189000000000000000e+03
			
 
				+8.809000000000000000e+03
			
 
				+-9.199000000000000000e+03
			
 
				+-1.871100000000000000e+04
			
 
				+3.102000000000000000e+04
			
 
				+-1.085100000000000000e+04
			
 
				+-1.214700000000000000e+04
			
 
				+3.160900000000000000e+04
			
 
				+2.392100000000000000e+04
			
 
				+-1.666400000000000000e+04
			
 
				+-8.508000000000000000e+03
			
 
				+-3.058000000000000000e+04
			
 
				+-2.020400000000000000e+04
			
 
				+1.101600000000000000e+04
			
 
				+-1.556800000000000000e+04
			
 
				+1.198500000000000000e+04
			
 
				+-1.217000000000000000e+03
			
 
				+-2.224500000000000000e+04
			
 
				+-1.610800000000000000e+04
			
 
				+3.009200000000000000e+04
			
 
				+-1.967900000000000000e+04
			
 
				+1.926800000000000000e+04
			
 
				+3.742000000000000000e+03
			
 
				+1.106700000000000000e+04
			
 
				+9.852000000000000000e+03
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_test_bias/bias.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_test_bias/bias.txt
@@ -0,0 +1,2 @@
 
				+# 8
			
 
				+-5.999000000000000000e+03,-3.030800000000000000e+04,8.504000000000000000e+03,-1.410900000000000000e+04,8.216000000000000000e+03,-2.707500000000000000e+04,1.136300000000000000e+04,-9.895000000000000000e+03
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_test_bias/input.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_test_bias/input.txt
@@ -0,0 +1,17 @@
 
				+# 1,4,4,8
			
 
				+2.621100000000000000e+04,4.731000000000000000e+03,-2.285900000000000000e+04,1.169700000000000000e+04,1.058500000000000000e+04,1.194300000000000000e+04,-1.410000000000000000e+04,2.691400000000000000e+04
			
 
				+1.231800000000000000e+04,2.586500000000000000e+04,2.130900000000000000e+04,-3.027800000000000000e+04,1.685900000000000000e+04,-2.241900000000000000e+04,-2.252000000000000000e+04,2.331600000000000000e+04
			
 
				+-1.264400000000000000e+04,-2.134100000000000000e+04,1.293200000000000000e+04,2.727100000000000000e+04,-2.945900000000000000e+04,1.700200000000000000e+04,6.470000000000000000e+02,3.064400000000000000e+04
			
 
				+1.513900000000000000e+04,2.465300000000000000e+04,-1.088700000000000000e+04,6.420000000000000000e+02,-2.301800000000000000e+04,3.393000000000000000e+03,2.235400000000000000e+04,3.202000000000000000e+04
			
 
				+2.957900000000000000e+04,1.132500000000000000e+04,1.838100000000000000e+04,-2.000300000000000000e+04,3.111100000000000000e+04,1.260600000000000000e+04,1.929000000000000000e+04,2.790000000000000000e+03
			
 
				+1.049400000000000000e+04,-1.174000000000000000e+03,-1.169200000000000000e+04,2.456200000000000000e+04,-2.573200000000000000e+04,2.158900000000000000e+04,-3.006300000000000000e+04,1.902000000000000000e+04
			
 
				+1.608000000000000000e+03,-6.749000000000000000e+03,-1.782500000000000000e+04,1.012400000000000000e+04,2.352600000000000000e+04,7.341000000000000000e+03,-6.651000000000000000e+03,-1.250000000000000000e+03
			
 
				+-4.309000000000000000e+03,1.624800000000000000e+04,9.321000000000000000e+03,2.341700000000000000e+04,1.869200000000000000e+04,-4.153000000000000000e+03,1.344700000000000000e+04,2.776000000000000000e+04
			
 
				+-3.234300000000000000e+04,7.500000000000000000e+01,-1.304000000000000000e+03,-1.849200000000000000e+04,-2.443300000000000000e+04,-2.503200000000000000e+04,2.108200000000000000e+04,-6.482000000000000000e+03
			
 
				+7.326000000000000000e+03,2.392400000000000000e+04,-1.593300000000000000e+04,-2.659000000000000000e+03,1.601000000000000000e+04,2.337900000000000000e+04,2.083900000000000000e+04,-2.054400000000000000e+04
			
 
				+-1.845800000000000000e+04,-9.760000000000000000e+03,-1.228600000000000000e+04,-1.962300000000000000e+04,-2.227800000000000000e+04,8.300000000000000000e+02,2.010000000000000000e+02,-1.896000000000000000e+03
			
 
				+-1.030300000000000000e+04,2.078000000000000000e+03,3.044000000000000000e+04,-3.203700000000000000e+04,3.078800000000000000e+04,-3.003000000000000000e+03,-2.792900000000000000e+04,9.405000000000000000e+03
			
 
				+1.177800000000000000e+04,-1.835300000000000000e+04,-2.124200000000000000e+04,-2.996000000000000000e+04,-1.238600000000000000e+04,-2.978500000000000000e+04,-1.717900000000000000e+04,-4.808000000000000000e+03
			
 
				+1.162400000000000000e+04,-1.508800000000000000e+04,4.166000000000000000e+03,2.856200000000000000e+04,-2.787000000000000000e+04,2.766000000000000000e+04,1.706400000000000000e+04,-1.029800000000000000e+04
			
 
				+1.238500000000000000e+04,8.103000000000000000e+03,7.981000000000000000e+03,-2.044700000000000000e+04,-1.109400000000000000e+04,1.143700000000000000e+04,1.407000000000000000e+04,1.775500000000000000e+04
			
 
				+-2.494000000000000000e+04,-3.160800000000000000e+04,3.102900000000000000e+04,1.169800000000000000e+04,-2.776600000000000000e+04,3.006800000000000000e+04,-1.122100000000000000e+04,-2.901700000000000000e+04
			
--- a/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_test_bias/kernel.txt
+++ b/CMSIS/NN/Tests/UnitTest/PregeneratedData/dw_int16xint8_fast_test_bias/kernel.txt
@@ -0,0 +1,33 @@
 
				+# 2,2,8,1
			
 
				+1.714000000000000000e+03
			
 
				+-2.184900000000000000e+04
			
 
				+-2.476100000000000000e+04
			
 
				+-1.361000000000000000e+03
			
 
				+-1.957400000000000000e+04
			
 
				+-3.218400000000000000e+04
			
 
				+-6.840000000000000000e+02
			
 
				+-3.275000000000000000e+03
			
 
				+2.633100000000000000e+04
			
 
				+-2.470000000000000000e+02
			
 
				+-2.135600000000000000e+04
			
 
				+1.088100000000000000e+04
			
 
				+1.304900000000000000e+04
			
 
				+-6.089000000000000000e+03
			
 
				+2.986800000000000000e+04
			
 
				+-3.164000000000000000e+03
			
 
				+-3.136700000000000000e+04
			
 
				+-2.364400000000000000e+04
			
 
				+-2.861400000000000000e+04
			
 
				+1.060000000000000000e+03
			
 
				+-5.750000000000000000e+02
			
 
				+-1.243600000000000000e+04
			
 
				+-5.382000000000000000e+03
			
 
				+-2.179700000000000000e+04
			
 
				+1.570000000000000000e+02
			
 
				+2.756000000000000000e+03
			
 
				+-2.026600000000000000e+04
			
 
				+-8.348000000000000000e+03
			
 
				+-2.431200000000000000e+04
			
 
				+2.118700000000000000e+04
			
 
				+3.119300000000000000e+04
			
 
				+-6.459000000000000000e+03
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/biases_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/biases_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int64_t dw_int16xint8_fast_multiple_batches_uneven_buffers_biases[8] =
			
 
				+    {-2863147, 3727567, 647963, 1377393, -795891, 3272794, 2461992, 3993595};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/config_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_CH 8
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_IN_CH 8
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_W 5
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_H 5
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DST_SIZE 216
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_SIZE 200
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_ACTIVATION_MIN -17000
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_ACTIVATION_MAX 32767
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_BATCHES 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_FILTER_X 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_FILTER_Y 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_STRIDE_X 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_STRIDE_Y 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_PAD_X 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_PAD_Y 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUTPUT_W 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUTPUT_H 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_CH_MULT 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUTPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DILATION_X 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DILATION_Y 1
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/input_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/input_data.h
@@ -0,0 +1,48 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_multiple_batches_uneven_buffers_input[600] = {
			
 
				+    4514,   -6264,  22437,  -30081, -32731, 23333,  -6303,  -28718, -23436, -19999, -12455, -31875, -9662,  28476,
			
 
				+    20182,  14105,  -12515, 25491,  17781,  -15185, -19187, -11760, -12623, -9167,  4279,   32628,  -25580, 6303,
			
 
				+    -26209, -28946, 2036,   8196,   17444,  4500,   3847,   -16039, 28642,  -8731,  22147,  -794,   18883,  10889,
			
 
				+    12,     -6815,  -829,   -17217, 8164,   19283,  -21613, 17538,  21293,  -5966,  -27753, 9093,   13487,  -4789,
			
 
				+    6998,   27037,  17152,  -24326, 10389,  -23716, 21672,  -12884, -26884, 12992,  -16245, 11014,  29250,  25922,
			
 
				+    3946,   -24343, 19052,  12153,  -25022, 4532,   19523,  18166,  -5593,  -6730,  -25788, 12771,  708,    -20099,
			
 
				+    22645,  11588,  -16490, -7065,  -12015, 15634,  30526,  28277,  24752,  27168,  11921,  14298,  32338,  31185,
			
 
				+    -26667, -21056, -30282, -6235,  -12590, 2642,   -16833, 4775,   -16080, 18708,  3449,   12126,  26941,  29131,
			
 
				+    24950,  -12991, 13927,  -30112, -22976, 19872,  15633,  15482,  8058,   -11582, 1227,   28262,  21935,  9733,
			
 
				+    -28713, 4935,   -25762, -23973, 13208,  -7388,  13598,  17013,  19550,  27131,  -11619, 8183,   31959,  -18481,
			
 
				+    -14343, 30288,  -27974, -1886,  12533,  14975,  6799,   32063,  23120,  18553,  6933,   -12000, 16429,  -28181,
			
 
				+    11131,  7045,   8840,   -10511, -21404, 30262,  10241,  -20465, -15976, -4,     -22973, -930,   32038,  1851,
			
 
				+    -32406, 6362,   -12185, -26227, 31353,  -17698, 20273,  18172,  -14618, -9049,  26638,  31722,  45,     -8207,
			
 
				+    -7542,  30256,  3977,   -3803,  -6135,  -6511,  -7829,  4396,   -28361, 23249,  2127,   -19569, 15107,  -21679,
			
 
				+    32234,  8862,   15172,  20597,  2222,   31781,  -16724, -13023, 8696,   5212,   -30365, 7750,   7714,   -4491,
			
 
				+    16464,  21014,  -21458, -31910, 7383,   -30106, 27007,  354,    -28719, -21813, 2960,   22159,  10619,  30990,
			
 
				+    -21772, -17183, -25272, 3563,   4941,   12123,  2685,   -23432, -27647, 23780,  17419,  -24723, -27121, -4803,
			
 
				+    -483,   1208,   -13475, -17251, 19420,  -7826,  17066,  7540,   22448,  2667,   -14807, 8150,   -26884, -7742,
			
 
				+    11747,  -30398, -13370, 18653,  23167,  30578,  10844,  29101,  9598,   31462,  -5056,  24038,  -8900,  -32158,
			
 
				+    -24679, -23664, 7600,   -11484, 10454,  23724,  17775,  2612,   -13031, -5835,  20910,  -30789, -3334,  5527,
			
 
				+    -16826, -23394, -2831,  -31304, 17841,  8784,   4300,   -24155, 2094,   3747,   25420,  16977,  20779,  15670,
			
 
				+    21855,  -26159, -23941, -27784, -254,   -5185,  -9459,  -25730, 16344,  10418,  -22875, 15273,  -32630, -17483,
			
 
				+    -22538, -21422, -8329,  10488,  11165,  -2564,  5247,   28814,  3213,   -23692, -30407, 1849,   -18588, -22198,
			
 
				+    -7200,  -6210,  -14329, 13737,  -3413,  -24955, 14352,  -12349, -3199,  23146,  -25870, 27947,  14498,  1671,
			
 
				+    -15063, 1331,   -7660,  1660,   -14907, -17594, 28781,  19170,  -26972, 13941,  27888,  -20807, 8100,   20663,
			
 
				+    4881,   -4664,  24648,  -15348, 26988,  3764,   7064,   -19698, 28662,  4967,   -31447, -3990,  1319,   10216,
			
 
				+    -2404,  5452,   -6171,  16105,  28784,  -23540, -6038,  -20267, 6141,   22688,  -18637, 16497,  -25589, -26877,
			
 
				+    8084,   2963,   12956,  -3018,  -13463, -25959, 7863,   -31553, 7655,   -22544, -23891, -22076, -5237,  30994,
			
 
				+    24142,  21414,  -20809, -17923, -917,   -28987, -6960,  -1285,  14088,  18731,  6776,   -5997,  -320,   13532,
			
 
				+    7767,   31781,  -8852,  5386,   21099,  -2248,  -21580, -2943,  18782,  -27158, -1761,  -30156, 22769,  -21249,
			
 
				+    24524,  31175,  -20822, -9223,  6151,   -15552, 24756,  23795,  4022,   19853,  -9712,  30430,  -12391, -4296,
			
 
				+    -8929,  21537,  14796,  27142,  -10424, -16581, -3041,  -13544, -26992, -18890, -22462, -6774,  11628,  1018,
			
 
				+    -25621, 18353,  -17908, -19922, 26285,  10790,  24655,  22802,  9859,   15660,  20643,  26268,  -23524, -23895,
			
 
				+    -11388, -11698, 22408,  -29754, -10488, -30398, 13225,  10755,  -26316, 23102,  27311,  30659,  -32045, -2260,
			
 
				+    -11195, 23198,  20894,  9002,   22270,  12538,  25690,  12697,  -17387, 31740,  -7549,  -2576,  -10840, 14290,
			
 
				+    15773,  12220,  -9868,  -30531, 1334,   8932,   10431,  28074,  15104,  -4137,  18301,  32552,  -4026,  -19388,
			
 
				+    -11370, 30801,  -21928, -3470,  24812,  9177,   -20412, -30332, -2986,  -31258, -11170, 20691,  16482,  32478,
			
 
				+    -6432,  20833,  5603,   -7879,  16721,  31311,  4882,   -16843, 28349,  26403,  1821,   17745,  15941,  -32241,
			
 
				+    -31464, 31072,  -882,   -7897,  16385,  -674,   -16858, -13992, -17240, -30765, 25568,  22085,  -18877, -15025,
			
 
				+    -25175, -4546,  10941,  -11285, -27707, 12754,  -15004, -6357,  10336,  -21846, 25039,  -12294, 29553,  18892,
			
 
				+    3053,   14698,  32404,  -25825, -2887,  19564,  -23601, 12855,  -11674, -4663,  19532,  -11157, 15800,  17225,
			
 
				+    28701,  10200,  26472,  -15691, 13591,  -2531,  14006,  -18312, 27497,  11402,  27889,  -23348, -12688, -28639,
			
 
				+    29653,  4195,   27327,  -12486, 20890,  5154,   23824,  17032,  26154,  16863,  31596,  18617};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/output_mult_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_multiple_batches_uneven_buffers_output_mult[8] =
			
 
				+    {1554034767, 1505895962, 1583401714, 1494720058, 1472755273, 1241592497, 1349384561, 1553647651};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/output_ref_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/output_ref_data.h
@@ -0,0 +1,19 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_multiple_batches_uneven_buffers_output_ref[216] = {
			
 
				+    -568,   772,   -1403, 13165, -9121, 4293,  -1850, 9930,  2107,   -1542, 5116,  -4174, -866,  -4346, 6988,  15155,
			
 
				+    -3608,  -942,  -36,   7705,  -2565, 6334,  -238,  19493, 4860,   9163,  -6674, 10025, -9959, 597,   864,   10148,
			
 
				+    -3263,  4730,  9571,  -6187, 4213,  -710,  -1252, 123,   -2355,  9267,  -6429, 14192, 473,   4450,  2632,  -4260,
			
 
				+    14614,  11928, 6422,  -6437, -600,  -1051, 10500, 7347,  2969,   2477,  -773,  989,   -9584, -981,  -2685, 11460,
			
 
				+    -3594,  10947, -8858, 4277,  7250,  1390,  1504,  11834, 7273,   13839, 15247, -3489, -9153, 3266,  -995,  -12885,
			
 
				+    -6326,  4127,  -2876, 12542, -5994, 8896,  4732,  472,   -3883,  13747, -3800, -7439, 3500,  792,   3022,  -341,
			
 
				+    7108,   14083, -6998, 12815, -383,  3831,  8668,  10743, 1396,   -5059, 5149,  -929,  5182,  1989,  2713,  8745,
			
 
				+    3535,   12276, -626,  -6494, 1218,  7325,  10026, 5821,  -317,   7016,  3550,  2645,  1713,  -394,  -6310, 2254,
			
 
				+    -5464,  22817, 5564,  4701,  -7853, -4662, 2489,  697,   8897,   6466,  -7167, 180,   -1425, 7021,  2245,  8549,
			
 
				+    -5591,  -8521, -5773, -3925, 3446,  1732,  6356,  3144,  1375,   -2206, -1414, 15335, 5451,  7415,  7940,  -9076,
			
 
				+    -8942,  555,   7867,  -6363, -5179, 1817,  -2758, 509,   -3065,  2414,  -3456, 5141,  1712,  1068,  10512, 13037,
			
 
				+    1309,   -2115, 7790,  -3325, 3501,  2769,  13190, 15204, -2078,  19354, 7206,  1672,  6171,  3043,  2681,  15449,
			
 
				+    -8469,  9272,  -6826, -5968, 1125,  4458,  25,    6598,  -13947, 14310, 9582,  3401,  1542,  1838,  16452, 3780,
			
 
				+    -16248, 2185,  -7568, 3032,  -2282, 9617,  4851,  -7862};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/output_shift_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/output_shift_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_multiple_batches_uneven_buffers_output_shift[8] = {-9, -9, -9, -9, -9, -9, -9, -9};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/test_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/test_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#include "biases_data.h"
			
 
				+#include "config_data.h"
			
 
				+#include "input_data.h"
			
 
				+#include "output_mult_data.h"
			
 
				+#include "output_ref_data.h"
			
 
				+#include "output_shift_data.h"
			
 
				+#include "weights_data.h"
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/weights_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/weights_data.h
@@ -0,0 +1,9 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q7_t dw_int16xint8_fast_multiple_batches_uneven_buffers_weights[72] = {
			
 
				+    -127, 36,  -22,  -31,  -45,  -71, 127, 29,   -91, -95, 127,  -79,  32,   3,    -33,  66,  88,   112,
			
 
				+    -107, 46,  -21,  48,   -115, -70, -58, 1,    61,  -43, -3,   -127, -60,  -77,  -91,  6,   -81,  127,
			
 
				+    -18,  -33, -2,   -122, 6,    -39, -3,  -107, -3,  -36, -113, -120, -121, -116, -118, -18, -107, -75,
			
 
				+    49,   45,  -119, -84,  28,   76,  -72, 26,   63,  127, -107, -127, -126, -43,  127,  -14, 74,   57};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/biases_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/biases_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int64_t dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_biases[8] = {0, 0, 0, 0, 0, 0, 0, 0};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/config_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_CH 8
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_IN_CH 8
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_W 4
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_H 4
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DST_SIZE 144
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_SIZE 128
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_ACTIVATION_MIN -17000
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_ACTIVATION_MAX 32767
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_BATCHES 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_FILTER_X 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_FILTER_Y 2
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_STRIDE_X 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_STRIDE_Y 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_PAD_X 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_PAD_Y 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUTPUT_W 2
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUTPUT_H 3
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_CH_MULT 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUTPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DILATION_X 1
			
 
				+#define DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DILATION_Y 1
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/input_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/input_data.h
@@ -0,0 +1,33 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_input[384] = {
			
 
				+    15228,  9887,   -14372, -21871, 3624,   19404,  -24752, 6792,   2304,   11364,  -17904, 27312,  3347,   2787,
			
 
				+    1239,   -19334, -18736, -29873, 4513,   -22520, 20635,  -25069, -20506, 32604,  8989,   -24370, -12386, 22942,
			
 
				+    -23928, -17316, -13539, 565,    -10994, 18020,  15848,  2587,   -25653, -11289, 21401,  8156,   10604,  -11784,
			
 
				+    6412,   17804,  -24579, -11870, 30884,  -15700, -24481, 9393,   -1179,  18935,  27416,  28212,  21248,  -25977,
			
 
				+    15532,  -22273, 1722,   24671,  -11287, 19818,  -16815, 7134,   29821,  28517,  26126,  31069,  2232,   11798,
			
 
				+    -31387, -30930, -31934, -13164, 4106,   31776,  1665,   3496,   5557,   -6483,  2857,   -9361,  22455,  1088,
			
 
				+    -11854, -32395, -31934, -6768,  2718,   18221,  -17588, -4744,  18785,  17680,  2941,   -566,   32123,  -9715,
			
 
				+    -28024, 22297,  -13894, 23195,  -29692, -24999, 3389,   -16523, 2056,   8969,   24372,  -29406, 19406,  5839,
			
 
				+    -10687, 11846,  -9750,  3677,   31289,  4790,   20334,  3979,   5898,   24649,  16815,  -261,   -10152, -4572,
			
 
				+    -31033, 30508,  12510,  24356,  16131,  -7928,  -4224,  -14176, 1498,   -20976, -26022, 5688,   19329,  8434,
			
 
				+    -3564,  182,    -30456, -3243,  -13439, 2203,   -32659, 26996,  -27508, -14286, -19281, -9613,  5932,   -5671,
			
 
				+    -1911,  -21057, 16671,  -24103, -5186,  7524,   -28100, -10892, -20598, -15598, -15875, -30880, -13841, -28827,
			
 
				+    -14333, 27181,  30035,  -3785,  31312,  -2263,  4563,   23230,  -12019, -25962, 28245,  -16165, 4083,   -14629,
			
 
				+    -8474,  -25761, 629,    -23075, -16496, 7769,   -18652, 26470,  13645,  9974,   -16495, 29217,  5501,   -9689,
			
 
				+    -29066, -22504, -4127,  15462,  7312,   26832,  -2886,  21641,  -30053, -5761,  -15461, -24739, 15177,  -26362,
			
 
				+    -29655, -14925, -19176, -11009, -10780, -1755,  25994,  8118,   25888,  -10223, -3459,  4649,   31277,  -24723,
			
 
				+    -23733, 19759,  10713,  -14879, -16563, 26750,  -31696, 17467,  6851,   32614,  2414,   -14640, 1449,   31562,
			
 
				+    2416,   -6229,  -30244, 10872,  -23742, 13546,  -5255,  -988,   -29111, 31777,  -14270, -23422, -30479, 7836,
			
 
				+    -24788, 27438,  16712,  5935,   -82,    2890,   32542,  -8496,  -30498, -30608, 8851,   6119,   30742,  18115,
			
 
				+    -19028, -3732,  -14902, -17744, -2804,  -6760,  31158,  -8253,  7319,   -11690, 17008,  15371,  3112,   32481,
			
 
				+    634,    30297,  30724,  6967,   -25357, -28063, 20029,  -16954, 7477,   16026,  14921,  29601,  11525,  -13683,
			
 
				+    -439,   -28034, -12736, 6472,   -6050,  2662,   -25105, -10328, -19314, 2402,   25143,  -19596, -8183,  21806,
			
 
				+    8676,   -12408, -23692, -21264, -15540, -16548, -32444, 17192,  -21254, 30309,  13083,  17425,  30594,  -25287,
			
 
				+    -9130,  25737,  -14862, -8582,  -1521,  27627,  26486,  -19678, 28299,  -17489, 19769,  -763,   15311,  14401,
			
 
				+    -31079, 17747,  5782,   -21421, -9977,  -2059,  17686,  29046,  5543,   -10583, -7955,  -309,   7523,   -19843,
			
 
				+    -16418, 1120,   3568,   14529,  10457,  -19391, -24139, -3775,  2893,   -12168, -32286, -15386, -27955, 6214,
			
 
				+    22963,  7639,   8214,   -27765, 754,    -25885, -22424, 25549,  -31498, -17183, -2830,  20309,  6080,   -13750,
			
 
				+    2449,   -28756, -23493, -22822, 23418,  21320};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/output_mult_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_output_mult[8] =
			
 
				+    {1321976810, 1963386025, 1097671686, 1316291179, 1344377358, 1921833836, 1832060491, 1383450161};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/output_ref_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/output_ref_data.h
@@ -0,0 +1,15 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_output_ref[144] = {
			
 
				+    9898,   -12494, 20130,  -14150, 24454,  14940,  1320,   32767,  7637,  4416,   6792,  10113,  -9105,  2676,  565,
			
 
				+    -17000, 12809,  2445,   2306,   7232,   4457,   -10952, 16570,  -9537, 7163,   -5208, 2130,   15009,  27017, 4309,
			
 
				+    8751,   6882,   -3744,  2048,   -17000, 16478,  -14428, 24242,  -1216, -16981, -6438, 3848,   -12394, 8126,  -17000,
			
 
				+    -1933,  -1974,  -8907,  26527,  -1189,  -17000, 23973,  -17000, 1327,  -4191,  -1999, 1403,   1502,   17036, 839,
			
 
				+    -16099, 22956,  -17000, 344,    -6628,  -17000, 13221,  -10702, 32767, 6499,   -9820, -17000, -17000, -2337, -17000,
			
 
				+    -14508, 3034,   1294,   -1537,  31893,  -3000,  -17000, -531,   -470,  -526,   -4772, -4614,  5220,   12959, 1882,
			
 
				+    18722,  6328,   7409,   10422,  -17000, -17000, -5017,  -7074,  858,   -17000, 23387, 4023,   -4904,  30751, 1271,
			
 
				+    12768,  13340,  -1802,  8795,   16018,  -7383,  -17000, -17000, -1418, -11483, 14649, -17000, 7342,   2578,  -17000,
			
 
				+    10468,  2186,   12587,  26124,  12626,  -7891,  -10727, 29700,  32767, 14351,  5004,  1106,   3563,   -5246, 4470,
			
 
				+    8394,   16173,  4699,   -17000, -15077, 3166,   11129,  11921,  -11093};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/output_shift_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/output_shift_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_output_shift[8] =
			
 
				+    {-7, -9, -7, -7, -7, -8, -8, -7};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/test_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/test_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#include "biases_data.h"
			
 
				+#include "config_data.h"
			
 
				+#include "input_data.h"
			
 
				+#include "output_mult_data.h"
			
 
				+#include "output_ref_data.h"
			
 
				+#include "output_shift_data.h"
			
 
				+#include "weights_data.h"
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/weights_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/weights_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q7_t dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_weights[48] = {
			
 
				+    69,   -34,  -108, 96,  -89,  -24,  127, 81,  6,   -125, -65, 77,   84,  -84, 73,  -84,
			
 
				+    -59,  127,  57,   127, 37,   -91,  -16, 127, -70, -102, 127, -102, -44, 25,  -14, 23,
			
 
				+    -127, -105, 4,    21,  -127, -127, 97,  50,  -19, -90,  -25, -11,  1,   52,  20,  -68};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/biases_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/biases_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int64_t dw_int16xint8_fast_null_bias_biases[8] = {0, 0, 0, 0, 0, 0, 0, 0};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/config_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_OUT_CH 8
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_IN_CH 8
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_INPUT_W 4
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_INPUT_H 4
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_DST_SIZE 72
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_INPUT_SIZE 128
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_OUT_ACTIVATION_MIN -17000
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_OUT_ACTIVATION_MAX 32767
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_INPUT_BATCHES 1
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_FILTER_X 2
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_FILTER_Y 2
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_STRIDE_X 1
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_STRIDE_Y 1
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_PAD_X 0
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_PAD_Y 0
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_OUTPUT_W 3
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_OUTPUT_H 3
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_CH_MULT 1
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_INPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_OUTPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_DILATION_X 1
			
 
				+#define DW_INT16XINT8_FAST_NULL_BIAS_DILATION_Y 1
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/input_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/input_data.h
@@ -0,0 +1,15 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_null_bias_input[128] = {
			
 
				+    -18577, 22082,  19015,  9939,   -9865,  -20512, -9998,  -22665, -21290, -19672, 4065,   -10617, 16410,
			
 
				+    31611,  -7291,  21927,  -19420, 476,    -24395, 9624,   27330,  717,    7899,   5117,   24490,  -32507,
			
 
				+    1771,   -9298,  30059,  4552,   4894,   30983,  27868,  -4336,  28377,  -32599, -28795, -18179, 14068,
			
 
				+    10048,  -13372, -17813, 5292,   5836,   27257,  -17730, 22580,  30897,  24837,  31407,  -3356,  -27394,
			
 
				+    23324,  -1121,  30405,  12506,  -5824,  17333,  21316,  -3119,  12142,  -15812, -8362,  13481,  8010,
			
 
				+    -16409, -1384,  -9159,  -17929, -20469, 5666,   7685,   21715,  8290,   -18605, 29087,  24827,  -7327,
			
 
				+    -31818, -24731, 28920,  -6734,  31443,  -2070,  -8030,  2869,   -26982, 15269,  10481,  13582,  -30426,
			
 
				+    4356,   -14758, -11251, 15973,  19675,  -20685, 8198,   -22781, 13469,  -27440, 3354,   -28953, 23398,
			
 
				+    24900,  25792,  -23244, -2801,  -9152,  17702,  21285,  -23864, -11231, -12826, -2178,  -23462, -9906,
			
 
				+    8555,   -6807,  -357,   -23882, -32250, 29466,  18255,  15836,  -29135, -18724, -25743};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/output_mult_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_null_bias_output_mult[8] =
			
 
				+    {1866128807, 2080095061, 1878397339, 1222262272, 1181868704, 1168789960, 1840511412, 1934724568};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/output_ref_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/output_ref_data.h
@@ -0,0 +1,10 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_null_bias_output_ref[72] = {
			
 
				+    -866,   -9369,  18025,  14936, 8948,  18291,  1225, -12887, 17485,  11598, -1880,  2217,   24936, 25780, 5322,
			
 
				+    4380,   -17000, 23903,  -2885, 13396, 11649,  6528, -9735,  9763,   -599,  2135,   3433,   1923,  25943, -8111,
			
 
				+    -7396,  27277,  -17000, -3610, -2397, -17000, -161, -7144,  9838,   14752, -12791, -2353,  11865, -1897, -17000,
			
 
				+    -8240,  22236,  2458,   -774,  15668, -17000, 1488, -3932,  -17000, 14050, -159,   -17000, 2347,  -8647, 5208,
			
 
				+    -17000, -12409, -17000, -1805, -3957, -17000, 5042, 12495,  4144,   -2766, -10838, 25524};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/output_shift_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/output_shift_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_null_bias_output_shift[8] = {-8, -8, -8, -7, -7, -7, -8, -8};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/test_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/test_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#include "biases_data.h"
			
 
				+#include "config_data.h"
			
 
				+#include "input_data.h"
			
 
				+#include "output_mult_data.h"
			
 
				+#include "output_ref_data.h"
			
 
				+#include "output_shift_data.h"
			
 
				+#include "weights_data.h"
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/weights_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_null_bias/weights_data.h
@@ -0,0 +1,7 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q7_t dw_int16xint8_fast_null_bias_weights[32] = {-37, -32, 63,  13,  -123, 125,  85,  122, -127, -44, 54,
			
 
				+                                                       72,  56,  127, 51,  127,  -127, 86,  127, -127, 122, -111,
			
 
				+                                                       -91, -22, 8,   127, 48,   -25,  127, -47, 127,  -112};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/biases_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/biases_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int64_t dw_int16xint8_fast_spill_null_bias_biases[5] = {0, 0, 0, 0, 0};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/config_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_CH 5
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_IN_CH 5
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_W 4
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_H 4
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DST_SIZE 120
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_SIZE 80
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_ACTIVATION_MIN -30000
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_ACTIVATION_MAX 32767
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_BATCHES 3
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_FILTER_X 3
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_FILTER_Y 3
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_STRIDE_X 2
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_STRIDE_Y 1
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_PAD_X 0
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_PAD_Y 1
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUTPUT_W 2
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUTPUT_H 4
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_CH_MULT 1
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUTPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DILATION_X 1
			
 
				+#define DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DILATION_Y 1
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/input_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/input_data.h
@@ -0,0 +1,23 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_spill_null_bias_input[240] = {
			
 
				+    -7319,  24299,  -27015, 18164,  -24128, 1886,   -6480,  31922,  -32767, 31539,  -19188, -26478, -2564,  27873,
			
 
				+    -5675,  7433,   31159,  -13898, 5334,   15962,  -11900, -16708, 5430,   -9333,  -25570, 25191,  31183,  28798,
			
 
				+    11724,  31452,  -16546, 31812,  -31313, -17968, -3231,  -29414, 8278,   3366,   -19478, 9861,   23600,  31414,
			
 
				+    1224,   -32139, 28262,  -26215, 3336,   17011,  -27273, -20013, -28111, 20012,  6270,   -16949, -4120,  19892,
			
 
				+    12244,  -27414, -25615, 18696,  -23660, -20768, 26922,  3737,   15419,  5561,   -2868,  -16641, 8907,   13550,
			
 
				+    32141,  -17904, -31540, -26793, 25552,  28856,  31576,  6202,   -14888, 23762,  -8198,  9875,   -13126, -16931,
			
 
				+    -9526,  -16055, 26727,  27041,  -12529, 8293,   5669,   6974,   5096,   23173,  7041,   -23140, -18321, 3989,
			
 
				+    20797,  -26139, -19480, 15818,  -3253,  -32401, 14789,  -28215, 28503,  7393,   12325,  7971,   -28896, -21213,
			
 
				+    -21739, 30576,  -10135, 4547,   -1594,  31627,  -7316,  -1917,  31685,  16917,  29387,  17453,  -24030, -28526,
			
 
				+    -25403, 16473,  -31294, -21366, 29989,  24883,  -4928,  27241,  8780,   -1027,  -18472, -19634, 379,    12938,
			
 
				+    -10436, -17286, 24274,  21095,  -11272, 19127,  -29437, 26081,  2487,   3074,   -21592, -30225, 2899,   -11512,
			
 
				+    -26336, -11936, -18925, -21961, -24225, -32092, -7743,  28017,  7708,   3289,   13632,  29825,  -2397,  13927,
			
 
				+    16813,  15465,  2567,   8863,   20106,  23467,  31344,  -587,   13892,  5223,   32138,  21982,  -1310,  2106,
			
 
				+    -14719, 1361,   -6400,  -15697, -25569, 4561,   17766,  13162,  15011,  19040,  29142,  9900,   -5609,  20467,
			
 
				+    14155,  5410,   9317,   20589,  -31716, 29891,  -18122, 11785,  31683,  -6612,  -19522, -31320, 8707,   -12412,
			
 
				+    30525,  -4086,  -18752, 31711,  -10208, 21016,  -7797,  12539,  32075,  -23034, 24830,  -16196, 28507,  30444,
			
 
				+    -7734,  -25719, 22082,  19411,  -29535, -21746, -26612, -21514, -23954, -20844, -28254, 17247,  -29444, 30996,
			
 
				+    -6366,  18489};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/output_mult_data.h
@@ -0,0 +1,9 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_spill_null_bias_output_mult[5] = {1582236111,
			
 
				+                                                                   1697278459,
			
 
				+                                                                   1584429450,
			
 
				+                                                                   1578476245,
			
 
				+                                                                   1655240699};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/output_ref_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/output_ref_data.h
@@ -0,0 +1,14 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_spill_null_bias_output_ref[120] = {
			
 
				+    14843,  -30000, -4048,  6675,   9088,   -6050,  9733,   -13083, 3811,  5069,  12857,  20067,  -15293, -6180,
			
 
				+    -6156,  8190,   -30000, 11260,  -260,   -3408,  -10865, -23340, 11763, -9607, -30000, 25650,  -12948, -28105,
			
 
				+    -14116, 8150,   -26021, 9738,   -6228,  -17489, 22739,  8742,   10161, 7544,  -12009, -4853,  -6881,  -6210,
			
 
				+    -386,   -9384,  12091,  -11068, -4250,  -14971, 12699,  -9181,  4662,  -6614, -4822,  9865,   -17257, 9171,
			
 
				+    23259,  10158,  13321,  15819,  -23835, 12345,  1636,   13329,  7029,  -6950, -9693,  -9370,  6622,   -8985,
			
 
				+    2815,   9062,   12809,  -3809,  -1580,  -5755,  21524,  -2161,  -2403, -7857, -3335,  -2979,  -355,   7144,
			
 
				+    11953,  8947,   -6468,  8674,   8205,   4131,   -11566, 17473,  3105,  6242,  12932,  10898,  -4314,  -1754,
			
 
				+    11727,  1264,   9262,   -11550, -11538, 32767,  -9274,  -9578,  12356, -4515, 8844,   -17155, 17149,  19417,
			
 
				+    1644,   -4235,  1852,   4599,   6590,   -4039,  6022,   12918};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/output_shift_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/output_shift_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_spill_null_bias_output_shift[5] = {-8, -8, -8, -8, -8};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/test_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/test_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#include "biases_data.h"
			
 
				+#include "config_data.h"
			
 
				+#include "input_data.h"
			
 
				+#include "output_mult_data.h"
			
 
				+#include "output_ref_data.h"
			
 
				+#include "output_shift_data.h"
			
 
				+#include "weights_data.h"
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/weights_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_spill_null_bias/weights_data.h
@@ -0,0 +1,7 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q7_t dw_int16xint8_fast_spill_null_bias_weights[45] = {
			
 
				+    -124, 92, 127, 36, 96, 92,  -94, -57, 66, -127, 127, -52, -11, 23,   43,  -105, -110, 3,   38,  -30, 38,  20,  57,
			
 
				+    58,   81, -88, 34, 13, 127, 72,  97,  21, 109,  63,  63,  95,  -127, -97, -56,  43,   -84, -75, 29,  -69, -126};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/biases_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/biases_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int64_t dw_int16xint8_fast_stride_null_bias_biases[8] = {0, 0, 0, 0, 0, 0, 0, 0};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/config_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_CH 8
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_IN_CH 8
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_W 4
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_H 4
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DST_SIZE 64
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_SIZE 128
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_ACTIVATION_MIN -32768
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_ACTIVATION_MAX 16000
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_BATCHES 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_FILTER_X 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_FILTER_Y 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_STRIDE_X 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_STRIDE_Y 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_PAD_X 0
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_PAD_Y 0
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUTPUT_W 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUTPUT_H 2
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_CH_MULT 1
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUTPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DILATION_X 1
			
 
				+#define DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DILATION_Y 1
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/input_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/input_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_stride_null_bias_input[256] = {
			
 
				+    13363,  -27062, 13685,  -19256, 13386,  -24814, -16641, -8110,  28707,  2327,   4496,   -28017, -9455,  12014,
			
 
				+    2666,   28345,  -22544, 6081,   -21473, -19897, -31912, 24589,  1586,   3373,   29895,  17061,  5695,   -28130,
			
 
				+    20181,  16434,  2913,   11695,  -3875,  -29550, -3478,  -7687,  -25978, -21484, 20305,  27668,  -16821, 10588,
			
 
				+    11624,  4562,   6732,   -24228, -985,   -15476, -11778, 22395,  1981,   -4412,  -25376, 27774,  5225,   -8601,
			
 
				+    -6840,  10110,  -8133,  -2168,  -22628, -31249, -19991, 3760,   12116,  13388,  -18548, 15261,  -28209, 17667,
			
 
				+    -8684,  -8110,  842,    14760,  -26392, -22422, 16303,  -10954, -22988, 6944,   13384,  -8538,  24785,  -7726,
			
 
				+    -32061, -16167, -9307,  21149,  16388,  -8987,  -22389, 23336,  26324,  -7298,  -17595, 32324,  25170,  22512,
			
 
				+    -20320, 22861,  8725,   -5506,  10775,  -32248, 8500,   2927,   11179,  -25472, -19600, -11217, -14638, 13955,
			
 
				+    10356,  26762,  -29159, 22904,  -4691,  -4378,  22744,  16809,  -13861, -24730, -1753,  -14511, -17693, 536,
			
 
				+    13927,  1868,   -9317,  -19828, 5988,   9475,   21101,  30475,  13260,  -281,   -16225, -26708, 20922,  -17110,
			
 
				+    29639,  15847,  29534,  27105,  -24441, -5009,  -1065,  11745,  -15487, 20515,  25651,  23515,  4366,   9555,
			
 
				+    -30672, -9989,  23196,  -13549, -5370,  25387,  1283,   -9193,  12193,  -24875, -12394, -24555, 21193,  2504,
			
 
				+    -13562, 3829,   -9883,  3917,   -9066,  29147,  27922,  11727,  -30588, 11604,  22163,  -3817,  -31168, 29129,
			
 
				+    -19606, 18210,  -6665,  -30774, -1956,  22483,  -21591, 23370,  2919,   -16802, 30963,  26431,  31245,  15148,
			
 
				+    22645,  12593,  1413,   10487,  12879,  -11177, 4104,   -5148,  -12158, -8878,  13007,  16136,  9350,   -6840,
			
 
				+    3851,   -22577, 2656,   -29706, -18987, 32231,  29063,  -16207, 25159,  23242,  32753,  26738,  23732,  -12515,
			
 
				+    15953,  32530,  -24277, -8282,  -4189,  -32459, 25953,  -18485, 17003,  9567,   -4648,  -25883, 601,    -7937,
			
 
				+    -25808, -11793, -1749,  -1667,  17639,  -9424,  -7337,  10549,  30607,  -16174, 725,    9412,   29522,  -1288,
			
 
				+    -31647, 8673,   -31008, 4926};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/output_mult_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_stride_null_bias_output_mult[8] =
			
 
				+    {1259187472, 1484929796, 1730993539, 1755899937, 1090673129, 1591800876, 1789240575, 1354058059};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/output_ref_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/output_ref_data.h
@@ -0,0 +1,10 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_stride_null_bias_output_ref[64] = {
			
 
				+    6851,   -8524, 1587,  -3803, -556,   10373,  -633,   6595,  11642,  -4598,  -253,  -2753, -12639,
			
 
				+    -18827, -388,  4217,  -9823, -311,   16000,  -13879, -6339, -4188,  -12402, 4556,  1479,  410,
			
 
				+    10538,  9838,  -9279, 8021,  -4391,  9789,   3323,   -3344, -15337, -4658,  -8062, -7318, 16000,
			
 
				+    9947,   13069, 2319,  -1845, -12946, -13451, -10140, 2482,  5172,   -11978, 4807,  -606,  1530,
			
 
				+    1551,   2971,  6140,  3655,  1978,   -2175,  -2535,  16000, -13683, 4954,   4712,  -3521};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/output_shift_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/output_shift_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_stride_null_bias_output_shift[8] = {-8, -8, -8, -8, -8, -8, -8, -8};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/test_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/test_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#include "biases_data.h"
			
 
				+#include "config_data.h"
			
 
				+#include "input_data.h"
			
 
				+#include "output_mult_data.h"
			
 
				+#include "output_ref_data.h"
			
 
				+#include "output_shift_data.h"
			
 
				+#include "weights_data.h"
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/weights_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_stride_null_bias/weights_data.h
@@ -0,0 +1,7 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q7_t dw_int16xint8_fast_stride_null_bias_weights[32] = {-71, 127, -76,  -109, 6,   -127, 88,  -6,   50,  -45, -78,
			
 
				+                                                              127, -72, -55,  127,  127, -95,  -41, -127, -83, 73,  -70,
			
 
				+                                                              48,  -6,  -127, -78,  125, -81,  127, 17,   44,  52};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/biases_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/biases_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int64_t dw_int16xint8_fast_test_bias_biases[8] =
			
 
				+    {-795878, -5334291, 1236759, -5395949, 1406307, -3500813, 1515920, -1889120};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/config_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/config_data.h
@@ -0,0 +1,24 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_OUT_CH 8
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_IN_CH 8
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_INPUT_W 4
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_INPUT_H 4
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_DST_SIZE 72
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_INPUT_SIZE 128
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_OUT_ACTIVATION_MIN -17000
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_OUT_ACTIVATION_MAX 32767
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_INPUT_BATCHES 1
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_FILTER_X 2
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_FILTER_Y 2
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_STRIDE_X 1
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_STRIDE_Y 1
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_PAD_X 0
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_PAD_Y 0
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_OUTPUT_W 3
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_OUTPUT_H 3
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_CH_MULT 1
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_INPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_OUTPUT_OFFSET 0
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_DILATION_X 1
			
 
				+#define DW_INT16XINT8_FAST_TEST_BIAS_DILATION_Y 1
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/input_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/input_data.h
@@ -0,0 +1,15 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_test_bias_input[128] = {
			
 
				+    26211,  4731,   -22859, 11697,  10585,  11943,  -14100, 26914,  12318,  25865,  21309,  -30278, 16859,
			
 
				+    -22419, -22520, 23316,  -12644, -21341, 12932,  27271,  -29459, 17002,  647,    30644,  15139,  24653,
			
 
				+    -10887, 642,    -23018, 3393,   22354,  32020,  29579,  11325,  18381,  -20003, 31111,  12606,  19290,
			
 
				+    2790,   10494,  -1174,  -11692, 24562,  -25732, 21589,  -30063, 19020,  1608,   -6749,  -17825, 10124,
			
 
				+    23526,  7341,   -6651,  -1250,  -4309,  16248,  9321,   23417,  18692,  -4153,  13447,  27760,  -32343,
			
 
				+    75,     -1304,  -18492, -24433, -25032, 21082,  -6482,  7326,   23924,  -15933, -2659,  16010,  23379,
			
 
				+    20839,  -20544, -18458, -9760,  -12286, -19623, -22278, 830,    201,    -1896,  -10303, 2078,   30440,
			
 
				+    -32037, 30788,  -3003,  -27929, 9405,   11778,  -18353, -21242, -29960, -12386, -29785, -17179, -4808,
			
 
				+    11624,  -15088, 4166,   28562,  -27870, 27660,  17064,  -10298, 12385,  8103,   7981,   -20447, -11094,
			
 
				+    11437,  14070,  17755,  -24940, -31608, 31029,  11698,  -27766, 30068,  -11221, -29017};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/output_mult_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/output_mult_data.h
@@ -0,0 +1,6 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_test_bias_output_mult[8] =
			
 
				+    {1569849899, 1183330628, 1432068229, 1089140565, 1216762566, 1610738992, 1561141528, 1090892326};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/output_ref_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/output_ref_data.h
@@ -0,0 +1,10 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q15_t dw_int16xint8_fast_test_bias_output_ref[72] = {
			
 
				+    -8682,  -15863, 1159,  -11937, 10286,  -9621,  -15424, -7734,  -9691, -17000, 1962,   -2115,  -11569, -4403,  4200,
			
 
				+    -10421, 1505,   -3791, 5922,   -7826,  888,    -17000, 17309,  -7826, 13275,  -13579, 5015,   -1902,  -12121, -7118,
			
 
				+    -76,    -1351,  -4281, -17000, 19135,  -2607,  18618,  -17000, 1027,  899,    3107,   -7082,  2951,   285,    -7891,
			
 
				+    -13555, -1065,  -4924, -4919,  -7022,  13589,  -8487,  18954,  8497,  18572,  -782,   -11946, -13100, 7578,   -5467,
			
 
				+    -551,   -17000, 8205,  -1650,  -10352, -12264, -10706, -10431, 20654, -4597,  -10298, -6300};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/output_shift_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/output_shift_data.h
@@ -0,0 +1,5 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const int32_t dw_int16xint8_fast_test_bias_output_shift[8] = {-8, -8, -8, -9, -8, -8, -8, -8};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/test_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/test_data.h
@@ -0,0 +1,8 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#include "biases_data.h"
			
 
				+#include "config_data.h"
			
 
				+#include "input_data.h"
			
 
				+#include "output_mult_data.h"
			
 
				+#include "output_ref_data.h"
			
 
				+#include "output_shift_data.h"
			
 
				+#include "weights_data.h"
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/weights_data.h
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/TestData/dw_int16xint8_fast_test_bias/weights_data.h
@@ -0,0 +1,7 @@
 
				+// Generated by generate_test_data.py using TFL version 2.9.1 as reference.
			
 
				+#pragma once
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+const q7_t dw_int16xint8_fast_test_bias_weights[32] = {7,   -117, -110, -16, -102, -127, -3,   -19,  107, -1, -95,
			
 
				+                                                       127, 68,   -24,  122, -18,  -127, -127, -127, 12,  -3, -49,
			
 
				+                                                       -22, -127, 1,    15,  -90,  -97,  -127, 84,   127, -38};
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_fast_s16/Unity/unity_test_arm_depthwise_conv_fast_s16.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_fast_s16/Unity/unity_test_arm_depthwise_conv_fast_s16.c
@@ -53,3 +53,27 @@ void test_dw_int16xint8_fast_stride_arm_depthwise_conv_fast_s16(void)
 
				 {
			
 
				     dw_int16xint8_fast_stride_arm_depthwise_conv_fast_s16();
			
 
				 }
			
 
				+void test_dw_int16xint8_fast_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    dw_int16xint8_fast_null_bias_arm_depthwise_conv_fast_s16();
			
 
				+}
			
 
				+void test_dw_int16xint8_fast_stride_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    dw_int16xint8_fast_stride_null_bias_arm_depthwise_conv_fast_s16();
			
 
				+}
			
 
				+void test_dw_int16xint8_fast_spill_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    dw_int16xint8_fast_spill_null_bias_arm_depthwise_conv_fast_s16();
			
 
				+}
			
 
				+void test_dw_int16xint8_fast_test_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    dw_int16xint8_fast_test_bias_arm_depthwise_conv_fast_s16();
			
 
				+}
			
 
				+void test_dw_int16xint8_fast_multiple_batches_uneven_buffers_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    dw_int16xint8_fast_multiple_batches_uneven_buffers_arm_depthwise_conv_fast_s16();
			
 
				+}
			
 
				+void test_dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_arm_depthwise_conv_fast_s16();
			
 
				+}
			
--- a/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_fast_s16/test_arm_depthwise_conv_fast_s16.c
+++ b/CMSIS/NN/Tests/UnitTest/TestCases/test_arm_depthwise_conv_fast_s16/test_arm_depthwise_conv_fast_s16.c
@@ -20,10 +20,30 @@
 
				 #include <unity.h>
			
 
				 
			
 
				 #include "../TestData/dw_int16xint8_fast/test_data.h"
			
 
				+#include "../TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers/test_data.h"
			
 
				+#include "../TestData/dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias/test_data.h"
			
 
				+#include "../TestData/dw_int16xint8_fast_null_bias/test_data.h"
			
 
				 #include "../TestData/dw_int16xint8_fast_spill/test_data.h"
			
 
				+#include "../TestData/dw_int16xint8_fast_spill_null_bias/test_data.h"
			
 
				 #include "../TestData/dw_int16xint8_fast_stride/test_data.h"
			
 
				+#include "../TestData/dw_int16xint8_fast_stride_null_bias/test_data.h"
			
 
				+#include "../TestData/dw_int16xint8_fast_test_bias/test_data.h"
			
 
				 #include "../Utils/validate.h"
			
 
				 
			
 
				+const int64_t *get_bias_s64_address(const int64_t *bias, int32_t size)
			
 
				+{
			
 
				+    const int64_t *return_bias = NULL;
			
 
				+    for (int i = 0; i < size; i++)
			
 
				+    {
			
 
				+        if (bias[i] != 0)
			
 
				+        {
			
 
				+            return_bias = bias;
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+    return return_bias;
			
 
				+}
			
 
				+
			
 
				 void dw_int16xint8_fast_arm_depthwise_conv_fast_s16(void)
			
 
				 {
			
 
				     const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
@@ -37,7 +57,7 @@ void dw_int16xint8_fast_arm_depthwise_conv_fast_s16(void)
 
				     cmsis_nn_dims bias_dims;
			
 
				     cmsis_nn_dims output_dims;
			
 
				 
			
 
				-    const q63_t *bias_data = dw_int16xint8_fast_biases;
			
 
				+    const q63_t *bias_data = get_bias_s64_address(dw_int16xint8_fast_biases, DW_INT16XINT8_FAST_OUT_CH);
			
 
				     const q15_t *input_data = dw_int16xint8_fast_input;
			
 
				     const q7_t *kernel_data = dw_int16xint8_fast_weights;
			
 
				     const q15_t *output_ref = dw_int16xint8_fast_output_ref;
			
@@ -121,7 +141,7 @@ void dw_int16xint8_fast_spill_arm_depthwise_conv_fast_s16(void)
 
				     cmsis_nn_dims bias_dims;
			
 
				     cmsis_nn_dims output_dims;
			
 
				 
			
 
				-    const q63_t *bias_data = dw_int16xint8_fast_spill_biases;
			
 
				+    const q63_t *bias_data = get_bias_s64_address(dw_int16xint8_fast_spill_biases, DW_INT16XINT8_FAST_SPILL_OUT_CH);
			
 
				     const q15_t *input_data = dw_int16xint8_fast_spill_input;
			
 
				     const q7_t *kernel_data = dw_int16xint8_fast_spill_weights;
			
 
				     const q15_t *output_ref = dw_int16xint8_fast_spill_output_ref;
			
@@ -205,7 +225,7 @@ void dw_int16xint8_fast_stride_arm_depthwise_conv_fast_s16(void)
 
				     cmsis_nn_dims bias_dims;
			
 
				     cmsis_nn_dims output_dims;
			
 
				 
			
 
				-    const q63_t *bias_data = dw_int16xint8_fast_stride_biases;
			
 
				+    const q63_t *bias_data = get_bias_s64_address(dw_int16xint8_fast_stride_biases, DW_INT16XINT8_FAST_STRIDE_OUT_CH);
			
 
				     const q15_t *input_data = dw_int16xint8_fast_stride_input;
			
 
				     const q7_t *kernel_data = dw_int16xint8_fast_stride_weights;
			
 
				     const q15_t *output_ref = dw_int16xint8_fast_stride_output_ref;
			
@@ -275,3 +295,513 @@ void dw_int16xint8_fast_stride_arm_depthwise_conv_fast_s16(void)
 
				     TEST_ASSERT_EQUAL(expected, result);
			
 
				     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				 }
			
 
				+
			
 
				+void dw_int16xint8_fast_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
 
				+    q15_t output[DW_INT16XINT8_FAST_NULL_BIAS_DST_SIZE] = {0};
			
 
				+
			
 
				+    cmsis_nn_context ctx;
			
 
				+    cmsis_nn_dw_conv_params dw_conv_params;
			
 
				+    cmsis_nn_per_channel_quant_params quant_params;
			
 
				+    cmsis_nn_dims input_dims;
			
 
				+    cmsis_nn_dims filter_dims;
			
 
				+    cmsis_nn_dims bias_dims;
			
 
				+    cmsis_nn_dims output_dims;
			
 
				+
			
 
				+    const q63_t *bias_data =
			
 
				+        get_bias_s64_address(dw_int16xint8_fast_null_bias_biases, DW_INT16XINT8_FAST_NULL_BIAS_OUT_CH);
			
 
				+    const q15_t *input_data = dw_int16xint8_fast_null_bias_input;
			
 
				+    const q7_t *kernel_data = dw_int16xint8_fast_null_bias_weights;
			
 
				+    const q15_t *output_ref = dw_int16xint8_fast_null_bias_output_ref;
			
 
				+    const int32_t output_ref_size = DW_INT16XINT8_FAST_NULL_BIAS_DST_SIZE;
			
 
				+
			
 
				+    input_dims.n = DW_INT16XINT8_FAST_NULL_BIAS_INPUT_BATCHES;
			
 
				+    input_dims.w = DW_INT16XINT8_FAST_NULL_BIAS_INPUT_W;
			
 
				+    input_dims.h = DW_INT16XINT8_FAST_NULL_BIAS_INPUT_H;
			
 
				+    input_dims.c = DW_INT16XINT8_FAST_NULL_BIAS_IN_CH;
			
 
				+    filter_dims.w = DW_INT16XINT8_FAST_NULL_BIAS_FILTER_X;
			
 
				+    filter_dims.h = DW_INT16XINT8_FAST_NULL_BIAS_FILTER_Y;
			
 
				+    output_dims.w = DW_INT16XINT8_FAST_NULL_BIAS_OUTPUT_W;
			
 
				+    output_dims.h = DW_INT16XINT8_FAST_NULL_BIAS_OUTPUT_H;
			
 
				+    output_dims.c = DW_INT16XINT8_FAST_NULL_BIAS_OUT_CH;
			
 
				+
			
 
				+    dw_conv_params.padding.w = DW_INT16XINT8_FAST_NULL_BIAS_PAD_X;
			
 
				+    dw_conv_params.padding.h = DW_INT16XINT8_FAST_NULL_BIAS_PAD_Y;
			
 
				+    dw_conv_params.stride.w = DW_INT16XINT8_FAST_NULL_BIAS_STRIDE_X;
			
 
				+    dw_conv_params.stride.h = DW_INT16XINT8_FAST_NULL_BIAS_STRIDE_Y;
			
 
				+    dw_conv_params.dilation.w = DW_INT16XINT8_FAST_NULL_BIAS_DILATION_X;
			
 
				+    dw_conv_params.dilation.h = DW_INT16XINT8_FAST_NULL_BIAS_DILATION_Y;
			
 
				+
			
 
				+    dw_conv_params.ch_mult = DW_INT16XINT8_FAST_NULL_BIAS_CH_MULT;
			
 
				+
			
 
				+    dw_conv_params.input_offset = DW_INT16XINT8_FAST_NULL_BIAS_INPUT_OFFSET;
			
 
				+    dw_conv_params.output_offset = DW_INT16XINT8_FAST_NULL_BIAS_OUTPUT_OFFSET;
			
 
				+    dw_conv_params.activation.min = DW_INT16XINT8_FAST_NULL_BIAS_OUT_ACTIVATION_MIN;
			
 
				+    dw_conv_params.activation.max = DW_INT16XINT8_FAST_NULL_BIAS_OUT_ACTIVATION_MAX;
			
 
				+    quant_params.multiplier = (int32_t *)dw_int16xint8_fast_null_bias_output_mult;
			
 
				+    quant_params.shift = (int32_t *)dw_int16xint8_fast_null_bias_output_shift;
			
 
				+
			
 
				+    int buf_size = arm_depthwise_conv_fast_s16_get_buffer_size(&input_dims, &filter_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    arm_cmsis_nn_status result = arm_depthwise_conv_fast_s16(&ctx,
			
 
				+                                                             &dw_conv_params,
			
 
				+                                                             &quant_params,
			
 
				+                                                             &input_dims,
			
 
				+                                                             input_data,
			
 
				+                                                             &filter_dims,
			
 
				+                                                             kernel_data,
			
 
				+                                                             &bias_dims,
			
 
				+                                                             bias_data,
			
 
				+                                                             &output_dims,
			
 
				+                                                             output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+
			
 
				+    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    result = arm_depthwise_conv_wrapper_s16(&ctx,
			
 
				+                                            &dw_conv_params,
			
 
				+                                            &quant_params,
			
 
				+                                            &input_dims,
			
 
				+                                            input_data,
			
 
				+                                            &filter_dims,
			
 
				+                                            kernel_data,
			
 
				+                                            &bias_dims,
			
 
				+                                            bias_data,
			
 
				+                                            &output_dims,
			
 
				+                                            output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+}
			
 
				+
			
 
				+void dw_int16xint8_fast_stride_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
 
				+    q15_t output[DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DST_SIZE] = {0};
			
 
				+
			
 
				+    cmsis_nn_context ctx;
			
 
				+    cmsis_nn_dw_conv_params dw_conv_params;
			
 
				+    cmsis_nn_per_channel_quant_params quant_params;
			
 
				+    cmsis_nn_dims input_dims;
			
 
				+    cmsis_nn_dims filter_dims;
			
 
				+    cmsis_nn_dims bias_dims;
			
 
				+    cmsis_nn_dims output_dims;
			
 
				+
			
 
				+    const q63_t *bias_data =
			
 
				+        get_bias_s64_address(dw_int16xint8_fast_stride_null_bias_biases, DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_CH);
			
 
				+    const q15_t *input_data = dw_int16xint8_fast_stride_null_bias_input;
			
 
				+    const q7_t *kernel_data = dw_int16xint8_fast_stride_null_bias_weights;
			
 
				+    const q15_t *output_ref = dw_int16xint8_fast_stride_null_bias_output_ref;
			
 
				+    const int32_t output_ref_size = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DST_SIZE;
			
 
				+
			
 
				+    input_dims.n = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_BATCHES;
			
 
				+    input_dims.w = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_W;
			
 
				+    input_dims.h = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_H;
			
 
				+    input_dims.c = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_IN_CH;
			
 
				+    filter_dims.w = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_FILTER_X;
			
 
				+    filter_dims.h = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_FILTER_Y;
			
 
				+    output_dims.w = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUTPUT_W;
			
 
				+    output_dims.h = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUTPUT_H;
			
 
				+    output_dims.c = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_CH;
			
 
				+
			
 
				+    dw_conv_params.padding.w = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_PAD_X;
			
 
				+    dw_conv_params.padding.h = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_PAD_Y;
			
 
				+    dw_conv_params.stride.w = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_STRIDE_X;
			
 
				+    dw_conv_params.stride.h = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_STRIDE_Y;
			
 
				+    dw_conv_params.dilation.w = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DILATION_X;
			
 
				+    dw_conv_params.dilation.h = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_DILATION_Y;
			
 
				+
			
 
				+    dw_conv_params.ch_mult = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_CH_MULT;
			
 
				+
			
 
				+    dw_conv_params.input_offset = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_INPUT_OFFSET;
			
 
				+    dw_conv_params.output_offset = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUTPUT_OFFSET;
			
 
				+    dw_conv_params.activation.min = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_ACTIVATION_MIN;
			
 
				+    dw_conv_params.activation.max = DW_INT16XINT8_FAST_STRIDE_NULL_BIAS_OUT_ACTIVATION_MAX;
			
 
				+    quant_params.multiplier = (int32_t *)dw_int16xint8_fast_stride_null_bias_output_mult;
			
 
				+    quant_params.shift = (int32_t *)dw_int16xint8_fast_stride_null_bias_output_shift;
			
 
				+
			
 
				+    int buf_size = arm_depthwise_conv_fast_s16_get_buffer_size(&input_dims, &filter_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    arm_cmsis_nn_status result = arm_depthwise_conv_fast_s16(&ctx,
			
 
				+                                                             &dw_conv_params,
			
 
				+                                                             &quant_params,
			
 
				+                                                             &input_dims,
			
 
				+                                                             input_data,
			
 
				+                                                             &filter_dims,
			
 
				+                                                             kernel_data,
			
 
				+                                                             &bias_dims,
			
 
				+                                                             bias_data,
			
 
				+                                                             &output_dims,
			
 
				+                                                             output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+
			
 
				+    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    result = arm_depthwise_conv_wrapper_s16(&ctx,
			
 
				+                                            &dw_conv_params,
			
 
				+                                            &quant_params,
			
 
				+                                            &input_dims,
			
 
				+                                            input_data,
			
 
				+                                            &filter_dims,
			
 
				+                                            kernel_data,
			
 
				+                                            &bias_dims,
			
 
				+                                            bias_data,
			
 
				+                                            &output_dims,
			
 
				+                                            output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+}
			
 
				+
			
 
				+void dw_int16xint8_fast_spill_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
 
				+    q15_t output[DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DST_SIZE] = {0};
			
 
				+
			
 
				+    cmsis_nn_context ctx;
			
 
				+    cmsis_nn_dw_conv_params dw_conv_params;
			
 
				+    cmsis_nn_per_channel_quant_params quant_params;
			
 
				+    cmsis_nn_dims input_dims;
			
 
				+    cmsis_nn_dims filter_dims;
			
 
				+    cmsis_nn_dims bias_dims;
			
 
				+    cmsis_nn_dims output_dims;
			
 
				+
			
 
				+    const q63_t *bias_data =
			
 
				+        get_bias_s64_address(dw_int16xint8_fast_spill_null_bias_biases, DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_CH);
			
 
				+    const q15_t *input_data = dw_int16xint8_fast_spill_null_bias_input;
			
 
				+    const q7_t *kernel_data = dw_int16xint8_fast_spill_null_bias_weights;
			
 
				+    const q15_t *output_ref = dw_int16xint8_fast_spill_null_bias_output_ref;
			
 
				+    const int32_t output_ref_size = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DST_SIZE;
			
 
				+
			
 
				+    input_dims.n = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_BATCHES;
			
 
				+    input_dims.w = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_W;
			
 
				+    input_dims.h = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_H;
			
 
				+    input_dims.c = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_IN_CH;
			
 
				+    filter_dims.w = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_FILTER_X;
			
 
				+    filter_dims.h = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_FILTER_Y;
			
 
				+    output_dims.w = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUTPUT_W;
			
 
				+    output_dims.h = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUTPUT_H;
			
 
				+    output_dims.c = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_CH;
			
 
				+
			
 
				+    dw_conv_params.padding.w = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_PAD_X;
			
 
				+    dw_conv_params.padding.h = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_PAD_Y;
			
 
				+    dw_conv_params.stride.w = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_STRIDE_X;
			
 
				+    dw_conv_params.stride.h = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_STRIDE_Y;
			
 
				+    dw_conv_params.dilation.w = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DILATION_X;
			
 
				+    dw_conv_params.dilation.h = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_DILATION_Y;
			
 
				+
			
 
				+    dw_conv_params.ch_mult = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_CH_MULT;
			
 
				+
			
 
				+    dw_conv_params.input_offset = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_INPUT_OFFSET;
			
 
				+    dw_conv_params.output_offset = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUTPUT_OFFSET;
			
 
				+    dw_conv_params.activation.min = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_ACTIVATION_MIN;
			
 
				+    dw_conv_params.activation.max = DW_INT16XINT8_FAST_SPILL_NULL_BIAS_OUT_ACTIVATION_MAX;
			
 
				+    quant_params.multiplier = (int32_t *)dw_int16xint8_fast_spill_null_bias_output_mult;
			
 
				+    quant_params.shift = (int32_t *)dw_int16xint8_fast_spill_null_bias_output_shift;
			
 
				+
			
 
				+    int buf_size = arm_depthwise_conv_fast_s16_get_buffer_size(&input_dims, &filter_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    arm_cmsis_nn_status result = arm_depthwise_conv_fast_s16(&ctx,
			
 
				+                                                             &dw_conv_params,
			
 
				+                                                             &quant_params,
			
 
				+                                                             &input_dims,
			
 
				+                                                             input_data,
			
 
				+                                                             &filter_dims,
			
 
				+                                                             kernel_data,
			
 
				+                                                             &bias_dims,
			
 
				+                                                             bias_data,
			
 
				+                                                             &output_dims,
			
 
				+                                                             output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+
			
 
				+    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    result = arm_depthwise_conv_wrapper_s16(&ctx,
			
 
				+                                            &dw_conv_params,
			
 
				+                                            &quant_params,
			
 
				+                                            &input_dims,
			
 
				+                                            input_data,
			
 
				+                                            &filter_dims,
			
 
				+                                            kernel_data,
			
 
				+                                            &bias_dims,
			
 
				+                                            bias_data,
			
 
				+                                            &output_dims,
			
 
				+                                            output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+}
			
 
				+
			
 
				+void dw_int16xint8_fast_test_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
 
				+    q15_t output[DW_INT16XINT8_FAST_TEST_BIAS_DST_SIZE] = {0};
			
 
				+
			
 
				+    cmsis_nn_context ctx;
			
 
				+    cmsis_nn_dw_conv_params dw_conv_params;
			
 
				+    cmsis_nn_per_channel_quant_params quant_params;
			
 
				+    cmsis_nn_dims input_dims;
			
 
				+    cmsis_nn_dims filter_dims;
			
 
				+    cmsis_nn_dims bias_dims;
			
 
				+    cmsis_nn_dims output_dims;
			
 
				+
			
 
				+    const q63_t *bias_data =
			
 
				+        get_bias_s64_address(dw_int16xint8_fast_test_bias_biases, DW_INT16XINT8_FAST_TEST_BIAS_OUT_CH);
			
 
				+    const q15_t *input_data = dw_int16xint8_fast_test_bias_input;
			
 
				+    const q7_t *kernel_data = dw_int16xint8_fast_test_bias_weights;
			
 
				+    const q15_t *output_ref = dw_int16xint8_fast_test_bias_output_ref;
			
 
				+    const int32_t output_ref_size = DW_INT16XINT8_FAST_TEST_BIAS_DST_SIZE;
			
 
				+
			
 
				+    input_dims.n = DW_INT16XINT8_FAST_TEST_BIAS_INPUT_BATCHES;
			
 
				+    input_dims.w = DW_INT16XINT8_FAST_TEST_BIAS_INPUT_W;
			
 
				+    input_dims.h = DW_INT16XINT8_FAST_TEST_BIAS_INPUT_H;
			
 
				+    input_dims.c = DW_INT16XINT8_FAST_TEST_BIAS_IN_CH;
			
 
				+    filter_dims.w = DW_INT16XINT8_FAST_TEST_BIAS_FILTER_X;
			
 
				+    filter_dims.h = DW_INT16XINT8_FAST_TEST_BIAS_FILTER_Y;
			
 
				+    output_dims.w = DW_INT16XINT8_FAST_TEST_BIAS_OUTPUT_W;
			
 
				+    output_dims.h = DW_INT16XINT8_FAST_TEST_BIAS_OUTPUT_H;
			
 
				+    output_dims.c = DW_INT16XINT8_FAST_TEST_BIAS_OUT_CH;
			
 
				+
			
 
				+    dw_conv_params.padding.w = DW_INT16XINT8_FAST_TEST_BIAS_PAD_X;
			
 
				+    dw_conv_params.padding.h = DW_INT16XINT8_FAST_TEST_BIAS_PAD_Y;
			
 
				+    dw_conv_params.stride.w = DW_INT16XINT8_FAST_TEST_BIAS_STRIDE_X;
			
 
				+    dw_conv_params.stride.h = DW_INT16XINT8_FAST_TEST_BIAS_STRIDE_Y;
			
 
				+    dw_conv_params.dilation.w = DW_INT16XINT8_FAST_TEST_BIAS_DILATION_X;
			
 
				+    dw_conv_params.dilation.h = DW_INT16XINT8_FAST_TEST_BIAS_DILATION_Y;
			
 
				+
			
 
				+    dw_conv_params.ch_mult = DW_INT16XINT8_FAST_TEST_BIAS_CH_MULT;
			
 
				+
			
 
				+    dw_conv_params.input_offset = DW_INT16XINT8_FAST_TEST_BIAS_INPUT_OFFSET;
			
 
				+    dw_conv_params.output_offset = DW_INT16XINT8_FAST_TEST_BIAS_OUTPUT_OFFSET;
			
 
				+    dw_conv_params.activation.min = DW_INT16XINT8_FAST_TEST_BIAS_OUT_ACTIVATION_MIN;
			
 
				+    dw_conv_params.activation.max = DW_INT16XINT8_FAST_TEST_BIAS_OUT_ACTIVATION_MAX;
			
 
				+    quant_params.multiplier = (int32_t *)dw_int16xint8_fast_test_bias_output_mult;
			
 
				+    quant_params.shift = (int32_t *)dw_int16xint8_fast_test_bias_output_shift;
			
 
				+
			
 
				+    int buf_size = arm_depthwise_conv_fast_s16_get_buffer_size(&input_dims, &filter_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    arm_cmsis_nn_status result = arm_depthwise_conv_fast_s16(&ctx,
			
 
				+                                                             &dw_conv_params,
			
 
				+                                                             &quant_params,
			
 
				+                                                             &input_dims,
			
 
				+                                                             input_data,
			
 
				+                                                             &filter_dims,
			
 
				+                                                             kernel_data,
			
 
				+                                                             &bias_dims,
			
 
				+                                                             bias_data,
			
 
				+                                                             &output_dims,
			
 
				+                                                             output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+
			
 
				+    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    result = arm_depthwise_conv_wrapper_s16(&ctx,
			
 
				+                                            &dw_conv_params,
			
 
				+                                            &quant_params,
			
 
				+                                            &input_dims,
			
 
				+                                            input_data,
			
 
				+                                            &filter_dims,
			
 
				+                                            kernel_data,
			
 
				+                                            &bias_dims,
			
 
				+                                            bias_data,
			
 
				+                                            &output_dims,
			
 
				+                                            output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+}
			
 
				+
			
 
				+void dw_int16xint8_fast_multiple_batches_uneven_buffers_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
 
				+    q15_t output[DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DST_SIZE] = {0};
			
 
				+
			
 
				+    cmsis_nn_context ctx;
			
 
				+    cmsis_nn_dw_conv_params dw_conv_params;
			
 
				+    cmsis_nn_per_channel_quant_params quant_params;
			
 
				+    cmsis_nn_dims input_dims;
			
 
				+    cmsis_nn_dims filter_dims;
			
 
				+    cmsis_nn_dims bias_dims;
			
 
				+    cmsis_nn_dims output_dims;
			
 
				+
			
 
				+    const q63_t *bias_data = get_bias_s64_address(dw_int16xint8_fast_multiple_batches_uneven_buffers_biases,
			
 
				+                                                  DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_CH);
			
 
				+    const q15_t *input_data = dw_int16xint8_fast_multiple_batches_uneven_buffers_input;
			
 
				+    const q7_t *kernel_data = dw_int16xint8_fast_multiple_batches_uneven_buffers_weights;
			
 
				+    const q15_t *output_ref = dw_int16xint8_fast_multiple_batches_uneven_buffers_output_ref;
			
 
				+    const int32_t output_ref_size = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DST_SIZE;
			
 
				+
			
 
				+    input_dims.n = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_BATCHES;
			
 
				+    input_dims.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_W;
			
 
				+    input_dims.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_H;
			
 
				+    input_dims.c = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_IN_CH;
			
 
				+    filter_dims.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_FILTER_X;
			
 
				+    filter_dims.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_FILTER_Y;
			
 
				+    output_dims.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUTPUT_W;
			
 
				+    output_dims.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUTPUT_H;
			
 
				+    output_dims.c = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_CH;
			
 
				+
			
 
				+    dw_conv_params.padding.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_PAD_X;
			
 
				+    dw_conv_params.padding.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_PAD_Y;
			
 
				+    dw_conv_params.stride.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_STRIDE_X;
			
 
				+    dw_conv_params.stride.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_STRIDE_Y;
			
 
				+    dw_conv_params.dilation.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DILATION_X;
			
 
				+    dw_conv_params.dilation.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_DILATION_Y;
			
 
				+
			
 
				+    dw_conv_params.ch_mult = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_CH_MULT;
			
 
				+
			
 
				+    dw_conv_params.input_offset = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_INPUT_OFFSET;
			
 
				+    dw_conv_params.output_offset = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUTPUT_OFFSET;
			
 
				+    dw_conv_params.activation.min = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_ACTIVATION_MIN;
			
 
				+    dw_conv_params.activation.max = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_OUT_ACTIVATION_MAX;
			
 
				+    quant_params.multiplier = (int32_t *)dw_int16xint8_fast_multiple_batches_uneven_buffers_output_mult;
			
 
				+    quant_params.shift = (int32_t *)dw_int16xint8_fast_multiple_batches_uneven_buffers_output_shift;
			
 
				+
			
 
				+    int buf_size = arm_depthwise_conv_fast_s16_get_buffer_size(&input_dims, &filter_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    arm_cmsis_nn_status result = arm_depthwise_conv_fast_s16(&ctx,
			
 
				+                                                             &dw_conv_params,
			
 
				+                                                             &quant_params,
			
 
				+                                                             &input_dims,
			
 
				+                                                             input_data,
			
 
				+                                                             &filter_dims,
			
 
				+                                                             kernel_data,
			
 
				+                                                             &bias_dims,
			
 
				+                                                             bias_data,
			
 
				+                                                             &output_dims,
			
 
				+                                                             output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+
			
 
				+    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    result = arm_depthwise_conv_wrapper_s16(&ctx,
			
 
				+                                            &dw_conv_params,
			
 
				+                                            &quant_params,
			
 
				+                                            &input_dims,
			
 
				+                                            input_data,
			
 
				+                                            &filter_dims,
			
 
				+                                            kernel_data,
			
 
				+                                            &bias_dims,
			
 
				+                                            bias_data,
			
 
				+                                            &output_dims,
			
 
				+                                            output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+}
			
 
				+
			
 
				+void dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_arm_depthwise_conv_fast_s16(void)
			
 
				+{
			
 
				+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
			
 
				+    q15_t output[DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DST_SIZE] = {0};
			
 
				+
			
 
				+    cmsis_nn_context ctx;
			
 
				+    cmsis_nn_dw_conv_params dw_conv_params;
			
 
				+    cmsis_nn_per_channel_quant_params quant_params;
			
 
				+    cmsis_nn_dims input_dims;
			
 
				+    cmsis_nn_dims filter_dims;
			
 
				+    cmsis_nn_dims bias_dims;
			
 
				+    cmsis_nn_dims output_dims;
			
 
				+
			
 
				+    const q63_t *bias_data = get_bias_s64_address(dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_biases,
			
 
				+                                                  DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_CH);
			
 
				+    const q15_t *input_data = dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_input;
			
 
				+    const q7_t *kernel_data = dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_weights;
			
 
				+    const q15_t *output_ref = dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_output_ref;
			
 
				+    const int32_t output_ref_size = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DST_SIZE;
			
 
				+
			
 
				+    input_dims.n = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_BATCHES;
			
 
				+    input_dims.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_W;
			
 
				+    input_dims.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_H;
			
 
				+    input_dims.c = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_IN_CH;
			
 
				+    filter_dims.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_FILTER_X;
			
 
				+    filter_dims.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_FILTER_Y;
			
 
				+    output_dims.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUTPUT_W;
			
 
				+    output_dims.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUTPUT_H;
			
 
				+    output_dims.c = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_CH;
			
 
				+
			
 
				+    dw_conv_params.padding.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_PAD_X;
			
 
				+    dw_conv_params.padding.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_PAD_Y;
			
 
				+    dw_conv_params.stride.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_STRIDE_X;
			
 
				+    dw_conv_params.stride.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_STRIDE_Y;
			
 
				+    dw_conv_params.dilation.w = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DILATION_X;
			
 
				+    dw_conv_params.dilation.h = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_DILATION_Y;
			
 
				+
			
 
				+    dw_conv_params.ch_mult = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_CH_MULT;
			
 
				+
			
 
				+    dw_conv_params.input_offset = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_INPUT_OFFSET;
			
 
				+    dw_conv_params.output_offset = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUTPUT_OFFSET;
			
 
				+    dw_conv_params.activation.min = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_ACTIVATION_MIN;
			
 
				+    dw_conv_params.activation.max = DW_INT16XINT8_FAST_MULTIPLE_BATCHES_UNEVEN_BUFFERS_NULL_BIAS_OUT_ACTIVATION_MAX;
			
 
				+    quant_params.multiplier = (int32_t *)dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_output_mult;
			
 
				+    quant_params.shift = (int32_t *)dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_output_shift;
			
 
				+
			
 
				+    int buf_size = arm_depthwise_conv_fast_s16_get_buffer_size(&input_dims, &filter_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    arm_cmsis_nn_status result = arm_depthwise_conv_fast_s16(&ctx,
			
 
				+                                                             &dw_conv_params,
			
 
				+                                                             &quant_params,
			
 
				+                                                             &input_dims,
			
 
				+                                                             input_data,
			
 
				+                                                             &filter_dims,
			
 
				+                                                             kernel_data,
			
 
				+                                                             &bias_dims,
			
 
				+                                                             bias_data,
			
 
				+                                                             &output_dims,
			
 
				+                                                             output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+
			
 
				+    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
			
 
				+    ctx.buf = malloc(buf_size);
			
 
				+
			
 
				+    result = arm_depthwise_conv_wrapper_s16(&ctx,
			
 
				+                                            &dw_conv_params,
			
 
				+                                            &quant_params,
			
 
				+                                            &input_dims,
			
 
				+                                            input_data,
			
 
				+                                            &filter_dims,
			
 
				+                                            kernel_data,
			
 
				+                                            &bias_dims,
			
 
				+                                            bias_data,
			
 
				+                                            &output_dims,
			
 
				+                                            output);
			
 
				+
			
 
				+    free(ctx.buf);
			
 
				+    TEST_ASSERT_EQUAL(expected, result);
			
 
				+    TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
			
 
				+}
			
--- a/CMSIS/NN/Tests/UnitTest/generate_test_data.py
+++ b/CMSIS/NN/Tests/UnitTest/generate_test_data.py
@@ -1298,16 +1298,50 @@ def load_all_testdatasets():
 
				                                           w_y=2, stride_x=1, stride_y=1, pad=False, randmin=INT16_MIN,
			
 
				                                           randmax=INT16_MAX, out_activation_min=-17000,
			
 
				                                           out_activation_max=32767, int16xint8=True)
			
 
				+    dataset = 'dw_int16xint8_fast_multiple_batches_uneven_buffers'
			
 
				+    TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=8, out_ch=8, x_in=5, y_in=5, w_x=3,
			
 
				+                                          w_y=3, stride_x=1, stride_y=1, pad=False, randmin=INT16_MIN,
			
 
				+                                          randmax=INT16_MAX, out_activation_min=-17000,
			
 
				+                                          out_activation_max=32767, int16xint8=True, batches=3)
			
 
				+    dataset = 'dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias'
			
 
				+    TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=8, out_ch=8, x_in=4, y_in=4, w_x=3,
			
 
				+                                          w_y=2, stride_x=1, stride_y=1, pad=False, randmin=INT16_MIN,
			
 
				+                                          randmax=INT16_MAX, out_activation_min=-17000,
			
 
				+                                          out_activation_max=32767, int16xint8=True, batches=3, generate_bias=False)
			
 
				+
			
 
				+    dataset = 'dw_int16xint8_fast_test_bias'
			
 
				+    nbr_of_out_channels = 8;
			
 
				+    bias=[i for i in range(nbr_of_out_channels)];
			
 
				+    TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=8, out_ch=nbr_of_out_channels, x_in=4, y_in=4, w_x=2,
			
 
				+                                          w_y=2, stride_x=1, stride_y=1, pad=False, randmin=INT16_MIN,
			
 
				+                                          randmax=INT16_MAX, out_activation_min=-17000,
			
 
				+                                          out_activation_max=32767, int16xint8=True, generate_bias=bias)
			
 
				+
			
 
				+    dataset = 'dw_int16xint8_fast_null_bias'
			
 
				+    TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=8, out_ch=8, x_in=4, y_in=4, w_x=2,
			
 
				+                                          w_y=2, stride_x=1, stride_y=1, pad=False, randmin=INT16_MIN,
			
 
				+                                          randmax=INT16_MAX, out_activation_min=-17000,
			
 
				+                                          out_activation_max=32767, int16xint8=True, generate_bias=False)
			
 
				     dataset = 'dw_int16xint8_fast_stride'
			
 
				     TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=8, out_ch=8, x_in=4, y_in=4, w_x=2,
			
 
				                                           w_y=2, stride_x=2, stride_y=2, pad=True, randmin=INT16_MIN,
			
 
				                                           randmax=INT16_MAX, batches=2, out_activation_min=INT16_MIN,
			
 
				                                           out_activation_max=16000, int16xint8=True)
			
 
				+    dataset = 'dw_int16xint8_fast_stride_null_bias'
			
 
				+    TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=8, out_ch=8, x_in=4, y_in=4, w_x=2,
			
 
				+                                          w_y=2, stride_x=2, stride_y=2, pad=True, randmin=INT16_MIN,
			
 
				+                                          randmax=INT16_MAX, batches=2, out_activation_min=INT16_MIN,
			
 
				+                                          out_activation_max=16000, int16xint8=True, generate_bias=False)
			
 
				     dataset = 'dw_int16xint8_fast_spill'
			
 
				     TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=5, out_ch=5, x_in=4, y_in=4, w_x=3,
			
 
				                                           w_y=3, stride_x=2, stride_y=1, pad=True, randmin=INT16_MIN,
			
 
				                                           randmax=INT16_MAX, batches=3, out_activation_min=-30000,
			
 
				                                           out_activation_max=32767, int16xint8=True)
			
 
				+    dataset = 'dw_int16xint8_fast_spill_null_bias'
			
 
				+    TESTDATA_SETS[dataset] = ConvSettings(dataset, type_of_test, args, in_ch=5, out_ch=5, x_in=4, y_in=4, w_x=3,
			
 
				+                                          w_y=3, stride_x=2, stride_y=1, pad=True, randmin=INT16_MIN,
			
 
				+                                          randmax=INT16_MAX, batches=3, out_activation_min=-30000,
			
 
				+                                          out_activation_max=32767, int16xint8=True, generate_bias=False)
			
 
				 
			
 
				     type_of_test = 'fully_connected'
			
 
				     dataset = 'fully_connected'