Ver Fonte

Conv DSP: Remove ordering of elements in im2col (#52)

RELATED=https://github.com/ARM-software/CMSIS-NN/issues/44
Måns Nilsson há 2 anos atrás
pai
commit
6bf2e7d48d
26 ficheiros alterados com 328 adições e 244 exclusões
  1. 34 9
      Include/arm_nnsupportfunctions.h
  2. 1 0
      README.md
  3. 8 6
      Source/ConvolutionFunctions/arm_convolve_fast_s16.c
  4. 54 76
      Source/ConvolutionFunctions/arm_convolve_s8.c
  5. 3 4
      Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
  6. 10 10
      Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c
  7. 3 2
      Source/NNSupportFunctions/CMakeLists.txt
  8. 4 4
      Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c
  9. 76 0
      Source/NNSupportFunctions/arm_s8_to_s16_unordered_with_offset.c
  10. 3 3
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/biases_data.h
  11. 3 3
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/config_data.h
  12. 65 65
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/input_data.h
  13. 3 3
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/output_mult_data.h
  14. 9 9
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/output_ref_data.h
  15. 3 3
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/output_shift_data.h
  16. 2 2
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/test_data.h
  17. 13 12
      Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/weights_data.h
  18. 3 3
      Tests/UnitTest/TestCases/TestData/conv_out_activation/biases_data.h
  19. 5 5
      Tests/UnitTest/TestCases/TestData/conv_out_activation/config_data.h
  20. 5 4
      Tests/UnitTest/TestCases/TestData/conv_out_activation/input_data.h
  21. 3 3
      Tests/UnitTest/TestCases/TestData/conv_out_activation/output_mult_data.h
  22. 3 3
      Tests/UnitTest/TestCases/TestData/conv_out_activation/output_ref_data.h
  23. 3 3
      Tests/UnitTest/TestCases/TestData/conv_out_activation/output_shift_data.h
  24. 2 2
      Tests/UnitTest/TestCases/TestData/conv_out_activation/test_data.h
  25. 7 5
      Tests/UnitTest/TestCases/TestData/conv_out_activation/weights_data.h
  26. 3 5
      Tests/UnitTest/generate_test_data.py

+ 34 - 9
Include/arm_nnsupportfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        8 March 2023
- * $Revision:    V.15.0.1
+ * $Date:        23 Mars 2023
+ * $Revision:    V.16.0.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -122,10 +122,11 @@ union arm_nn_long_long
  * @param[in]    src        pointer to the s8 input vector
  * @param[out]   dst        pointer to the s16 output vector
  * @param[in]    block_size length of the input vector
- * @param[in]    offset     s8 offset to be added to each input vector element.
+ * @param[in]    offset     s16 offset to be added to each input vector element.
  *
  * \par Description:
  *
+ * Output elements are ordered.
  * The equation used for the conversion process is:
  *
  * <pre>
@@ -133,7 +134,33 @@ union arm_nn_long_long
  * </pre>
  *
  */
-void arm_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, uint32_t block_size, int16_t offset);
+void arm_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset);
+
+#if defined(ARM_MATH_DSP)
+/**
+ * @brief Converts the elements from a s8 vector to a s16 vector with an added offset
+ * @param[in]    src        pointer to the s8 input vector
+ * @param[out]   dst        pointer to the s16 output vector
+ * @param[in]    block_size length of the input vector
+ * @param[in]    offset     s16 offset to be added to each input vector element.
+ *
+ * \par Description:
+ *
+ * No additonal ordering is done with the result that output elements are not in order.
+ * Instead of ABCD order will be ACBD.
+ * Note this is for processors with DSP extension only.
+ * The equation used for the conversion process is:
+ *
+ * <pre>
+ *  dst[n - 0] = (int16_t) src[n - 0] + offset;   0 <= n < block_size.
+ *  dst[n - 1] = (int16_t) src[n - 2] + offset;   0 <= n < block_size.
+ *  dst[n - 2] = (int16_t) src[n - 1] + offset;   0 <= n < block_size.
+ *  dst[n - 3] = (int16_t) src[n - 3] + offset;   0 <= n < block_size.
+ * </pre>
+ *
+ */
+void arm_s8_to_s16_unordered_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset);
+#endif
 
 /**
  * @brief Depthwise conv on an im2col buffer where the input channel equals output channel.
@@ -655,9 +682,8 @@ __STATIC_FORCEINLINE void arm_memset_s8(int8_t *dst, const int8_t val, uint32_t
 #if defined(ARM_MATH_DSP)
 
 /**
- * @brief read and expand one s8 word into two s16 words
+ * @brief read and expand one s8 word into two s16 words with ordering.
  */
-
 __STATIC_FORCEINLINE const int8_t *read_and_pad(const int8_t *source, int32_t *out1, int32_t *out2)
 {
     int32_t inA = arm_nn_read_s8x4_ia(&source);
@@ -676,9 +702,8 @@ __STATIC_FORCEINLINE const int8_t *read_and_pad(const int8_t *source, int32_t *o
 }
 
 /**
- * @brief read and expand one s8 word into two s16 words with reordering
+ * @brief read and expand one s8 word into two s16 words with no additional ordering.
  */
-
 __STATIC_FORCEINLINE const int8_t *read_and_pad_reordered(const int8_t *source, int32_t *out1, int32_t *out2)
 {
     int32_t inA = arm_nn_read_s8x4_ia(&source);
@@ -725,7 +750,7 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
                                       const int32_t out_offset,
                                       const int16_t activation_min,
                                       const int16_t activation_max,
-                                      const uint16_t num_col_a,
+                                      const int32_t num_col_a,
                                       const int32_t *const output_bias,
                                       int8_t *out_0);
 

+ 1 - 0
README.md

@@ -35,6 +35,7 @@ Examples are Cortex-M55 or Cortex-M85 configured with MVE.
 | AvgPooling      | Yes         | Yes        | Yes         | Yes          | Yes         | Yes          |
 | Softmax         | Yes         | Yes        | Yes         | Yes          | Yes         | No           |
 | LSTM            | Yes         | NA         | Yes         | NA           | Yes         | NA           |
+| SVDF            | Yes         | No         | Yes         | No           | Yes         | No           |
 
 
 ## Contribution Guideline

+ 8 - 6
Source/ConvolutionFunctions/arm_convolve_fast_s16.c

@@ -21,8 +21,8 @@
  * Title:        arm_convolve_fast_s16.c
  * Description:  Optimized s16 version of convolution.
  *
- * $Date:        30 January 2023
- * $Revision:    V.2.2.0
+ * $Date:        23 March 2023
+ * $Revision:    V.2.3.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -81,6 +81,7 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
     const int32_t output_x = output_dims->w;
     const int32_t output_y = output_dims->h;
     const int32_t output_ch = output_dims->c;
+    const int32_t rhs_cols = input_ch * kernel_y * kernel_x;
 
     const int32_t pad_x = conv_params->padding.w;
     const int32_t pad_y = conv_params->padding.h;
@@ -124,7 +125,7 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
                     }
                 }
                 /* Computation is filed for every 2 columns */
-                if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x)
+                if (two_column_buf == buffer_a + 2 * rhs_cols)
                 {
                     out = arm_nn_mat_mult_kernel_s16(filter_data,
                                                      buffer_a,
@@ -133,7 +134,7 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
                                                      output_mult,
                                                      out_activation_min,
                                                      out_activation_max,
-                                                     (input_ch * kernel_y * kernel_x),
+                                                     rhs_cols,
                                                      bias_data,
                                                      out);
 
@@ -158,7 +159,7 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
                 const int16_t *ip_as_col = buffer_a;
 
                 /* 4 multiply and accumulates are done in one loop. */
-                uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2;
+                int32_t col_count = rhs_cols >> 2;
 
                 while (col_count)
                 {
@@ -175,7 +176,7 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
                     col_count--;
                 }
                 /* Handle left over mac */
-                col_count = input_ch * kernel_y * kernel_x & 0x3;
+                col_count = rhs_cols & 0x3;
                 while (col_count)
                 {
                     int8_t ker_a1 = *ker_a++;
@@ -214,6 +215,7 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
         (void)out_activation_max;
         (void)output_mult;
         (void)output_shift;
+        (void)rhs_cols;
         return ARM_CMSIS_NN_ARG_ERROR;
 #endif
         /* Advance to the next batch */

+ 54 - 76
Source/ConvolutionFunctions/arm_convolve_s8.c

@@ -21,8 +21,8 @@
  * Title:        arm_convolve_s8.c
  * Description:  s8 version of convolution using symmetric quantization.
  *
- * $Date:        8 March 2023
- * $Revision:    V.3.3.1
+ * $Date:        21 Mars 2023
+ * $Revision:    V.3.4.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -47,7 +47,6 @@
  * are multiples of 4 or atleast greater than 4.
  *
  */
-
 arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
                                     const cmsis_nn_conv_params *conv_params,
                                     const cmsis_nn_per_channel_quant_params *quant_params,
@@ -62,7 +61,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
 {
     (void)bias_dims;
 
-    if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0)
+    if (ctx->buf == NULL)
     {
         return ARM_CMSIS_NN_ARG_ERROR;
     }
@@ -87,21 +86,27 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
     const int32_t out_offset = conv_params->output_offset;
     const int32_t out_activation_min = conv_params->activation.min;
     const int32_t out_activation_max = conv_params->activation.max;
-
+    const int32_t rhs_cols = kernel_x * kernel_y * input_ch;
     const int32_t input_offset = conv_params->input_offset;
+
     int32_t *output_mult = quant_params->multiplier;
     int32_t *output_shift = quant_params->shift;
 
     int i_batch;
     for (i_batch = 0; i_batch < input_batches; i_batch++)
     {
+
 #if defined(ARM_MATH_MVEI)
-        /* Generate upto four columns from the input tensor a GEMM computation */
+        /* Generate up to four columns from the input tensor a GEMM computation */
         int8_t *im2col_buf = (int8_t *)buffer_a;
+        const int32_t rhs_rows = output_dims->c;
+#else
+        /* Use as a ping-pong buffer for unordered elements */
+        int8_t *im2col_buf = (int8_t *)buffer_a + rhs_cols * 2;
+        int16_t *im2col_buf_start_s16 = buffer_a;
+#endif
         int8_t *out = output_data;
         int32_t lhs_rows = 0;
-        const int32_t rhs_rows = output_dims->c;
-        const int32_t rhs_cols = kernel_x * kernel_y * input_ch;
 
         /* This part implements the im2col function */
         for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
@@ -131,6 +136,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
                 }
                 lhs_rows++;
 
+#if defined(ARM_MATH_MVEI)
                 /* Computation is filed for every 4 columns */
                 if (lhs_rows == 4)
                 {
@@ -153,15 +159,48 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
                     lhs_rows = 0;
                     im2col_buf = (int8_t *)buffer_a;
                 }
+#else
+    #if defined(ARM_MATH_DSP)
+                /* Copy one column with input offset and no ordering */
+                arm_s8_to_s16_unordered_with_offset(
+                    im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+    #else
+                arm_q7_to_q15_with_offset(im2col_buf - rhs_cols, im2col_buf_start_s16, rhs_cols, (int16_t)input_offset);
+    #endif
+                im2col_buf_start_s16 += rhs_cols;
+
+                if (lhs_rows == 2)
+                {
+                    out = arm_nn_mat_mult_kernel_s8_s16(filter_data,
+                                                        buffer_a,
+                                                        output_ch,
+                                                        output_shift,
+                                                        output_mult,
+                                                        out_offset,
+                                                        out_activation_min,
+                                                        out_activation_max,
+                                                        rhs_cols,
+                                                        bias_data,
+                                                        out);
+
+                    /* counter reset */
+                    im2col_buf_start_s16 = buffer_a;
+                    im2col_buf = (int8_t *)buffer_a + rhs_cols * 2;
+                    lhs_rows = 0;
+                }
+#endif
             }
+
             if (out == NULL)
             {
                 return ARM_CMSIS_NN_NO_IMPL_ERROR;
             }
         }
+
         /* Handle left over columns */
         if (lhs_rows != 0)
         {
+#if defined(ARM_MATH_MVEI)
             arm_nn_mat_mult_nt_t_s8((int8_t *)buffer_a,
                                     filter_data,
                                     bias_data,
@@ -179,68 +218,8 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
             out += lhs_rows * rhs_rows;
             lhs_rows = 0;
             im2col_buf = (int8_t *)buffer_a;
-        }
 #else // #if defined(ARM_MATH_MVEI)
-        int32_t i_out_y, i_out_x, i_ker_y, i_ker_x;
-
-        /* Generate two columns from the input tensor a GEMM computation */
-        int16_t *two_column_buf = buffer_a;
-        int8_t *out = output_data;
-
-        /* This part implements the im2col function */
-        for (i_out_y = 0; i_out_y < output_y; i_out_y++)
-        {
-            for (i_out_x = 0; i_out_x < output_x; i_out_x++)
-            {
-                const int32_t base_idx_y = stride_y * i_out_y - pad_y;
-                const int32_t base_idx_x = stride_x * i_out_x - pad_x;
 
-                for (i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
-                {
-                    for (i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
-                    {
-                        const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
-                        const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
-
-                        if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
-                        {
-                            /* Filling 0 for out-of-bound paddings */
-                            memset(two_column_buf, 0, sizeof(int16_t) * input_ch);
-                        }
-                        else
-                        {
-                            /* Copying the pixel data to column */
-                            arm_q7_to_q15_with_offset(
-                                input_data + (k_y * input_x + k_x) * input_ch, two_column_buf, input_ch, input_offset);
-                        }
-                        two_column_buf += input_ch;
-                    }
-                }
-
-                /* Computation is filed for every 2 columns */
-                if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x)
-                {
-                    out = arm_nn_mat_mult_kernel_s8_s16(filter_data,
-                                                        buffer_a,
-                                                        output_ch,
-                                                        output_shift,
-                                                        output_mult,
-                                                        out_offset,
-                                                        out_activation_min,
-                                                        out_activation_max,
-                                                        input_ch * kernel_y * kernel_x,
-                                                        bias_data,
-                                                        out);
-
-                    /* counter reset */
-                    two_column_buf = buffer_a;
-                }
-            }
-        }
-
-        /* left-over because odd number of output pixels */
-        if (two_column_buf != buffer_a)
-        {
             const int8_t *ker_a = filter_data;
             int i;
 
@@ -253,19 +232,17 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
                     sum = bias_data[i];
                 }
 
-                /* Point to the beginning of the im2col buffer where the input is available as a rearranged column */
                 const int16_t *ip_as_col = buffer_a;
 
-                /* 4 multiply and accumulates are done in one loop. */
     #if defined(ARM_MATH_DSP)
-                uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2;
-
+                /* 4 multiply and accumulates are done in one loop. */
+                uint16_t col_count = rhs_cols / 4;
                 while (col_count)
                 {
                     int32_t ker_a1, ker_a2;
                     int32_t ip_b1, ip_b2;
 
-                    ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2);
+                    ker_a = read_and_pad_reordered(ker_a, &ker_a1, &ker_a2);
 
                     ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
                     sum = SMLAD(ker_a1, ip_b1, sum);
@@ -275,9 +252,9 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
                     col_count--;
                 }
                 /* Handle left over mac */
-                col_count = input_ch * kernel_y * kernel_x & 0x3;
+                col_count = rhs_cols & 0x3;
     #else
-                uint16_t col_count = input_ch * kernel_y * kernel_x;
+                uint16_t col_count = rhs_cols;
     #endif
                 while (col_count)
                 {
@@ -293,8 +270,9 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
                 sum = MIN(sum, out_activation_max);
                 *out++ = (int8_t)sum;
             }
-        }
 #endif // #if defined(ARM_MATH_MVEI)
+        }
+
         /* Advance to the next batch */
         input_data += (input_x * input_y * input_ch);
         output_data += (output_x * output_y * output_ch);

+ 3 - 4
Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c

@@ -22,8 +22,8 @@
  * Description:  Optimized s8 depthwise separable convolution function for
  *               channel multiplier of 1.
  *
- * $Date:        8 March 2023
- * $Revision:    V.3.4.0
+ * $Date:        22 March 2023
+ * $Revision:    V.3.5.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -60,7 +60,6 @@ arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
                                               const cmsis_nn_dims *output_dims,
                                               int8_t *output)
 {
-
     const int32_t input_ch = input_dims->c;
     const int32_t output_ch = output_dims->c;
 
@@ -254,7 +253,7 @@ arm_cmsis_nn_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
                         arm_q7_to_q15_with_offset((int8_t *)input + (idx_y * input_x + idx_x) * input_ch,
                                                   &col_buffer[index],
                                                   input_ch,
-                                                  input_offset);
+                                                  (int16_t)input_offset);
                     }
                     index += input_ch;
                 }

+ 10 - 10
Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c

@@ -21,8 +21,8 @@
  * Title:        arm_nn_mat_mult_kernel_s8_s16.c
  * Description:  Matrix-multiplication function for convolution
  *
- * $Date:        5 Januray 2023
- * $Revision:    V.1.2.0
+ * $Date:        23 Mars 2023
+ * $Revision:    V.1.3.0
  *
  * Target :  Arm(R) M-Profile Architecture
  * -------------------------------------------------------------------- */
@@ -45,7 +45,7 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
                                       const int32_t out_offset,
                                       const int16_t activation_min,
                                       const int16_t activation_max,
-                                      const uint16_t num_col_a,
+                                      const int32_t num_col_a,
                                       const int32_t *const output_bias,
                                       int8_t *out_0)
 {
@@ -80,7 +80,7 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
         }
 
     #if defined(ARM_MATH_DSP)
-        uint16_t col_count = num_col_a / 4;
+        int32_t col_count = num_col_a / 4;
         /* accumulate over the vector */
         while (col_count)
         {
@@ -88,8 +88,8 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
             int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
             int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
 
-            ip_a0 = read_and_pad(ip_a0, &a01, &a02);
-            ip_a1 = read_and_pad(ip_a1, &a11, &a12);
+            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
+            ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12);
 
             ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0);
             ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1);
@@ -108,7 +108,7 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
         } /* while over col_count */
         col_count = num_col_a & 0x3;
     #else
-        uint16_t col_count = num_col_a;
+        int32_t col_count = num_col_a;
     #endif
         while (col_count)
         {
@@ -175,14 +175,14 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
         }
 
     #if defined(ARM_MATH_DSP)
-        uint16_t col_count = num_col_a >> 2;
+        int32_t col_count = num_col_a >> 2;
         while (col_count)
         {
             int32_t a01, a02;
             int32_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
             int32_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
 
-            ip_a0 = read_and_pad(ip_a0, &a01, &a02);
+            ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
 
             ch_0_out_0 = SMLAD(a01, b0, ch_0_out_0);
             ch_0_out_1 = SMLAD(a01, b1, ch_0_out_1);
@@ -196,7 +196,7 @@ int8_t *arm_nn_mat_mult_kernel_s8_s16(const int8_t *input_a,
         }
         col_count = num_col_a & 0x3;
     #else
-        uint16_t col_count = num_col_a;
+        int32_t col_count = num_col_a;
     #endif
         while (col_count)
         {

+ 3 - 2
Source/NNSupportFunctions/CMakeLists.txt

@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2019-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -19,4 +19,5 @@
 file(GLOB SRC "./*_s8*.c")
 file(GLOB SRC_S16 "./*_s16*.c")
 target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16} arm_nntables.c
-                                                  arm_q7_to_q15_with_offset.c)
+  arm_q7_to_q15_with_offset.c
+  arm_s8_to_s16_unordered_with_offset.c)

+ 4 - 4
Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c

@@ -21,8 +21,8 @@
  * Title:        arm_q7_to_q15_with_offset.c
  * Description:  Converts the elements of the Q7 vector to Q15 vector with an added offset
  *
- * $Date:        5 January 2023
- * $Revision:    V.2.1.0
+ * $Date:        22 March 2023
+ * $Revision:    V.2.2.0
  *
  * Target :  Arm(R) M-Profile Architecture
  *
@@ -39,9 +39,9 @@
  * @{
  */
 
-void arm_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, uint32_t block_size, int16_t offset)
+void arm_q7_to_q15_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
 {
-    int block_cnt;
+    int32_t block_cnt;
 
 #if defined(ARM_MATH_MVEI)
 

+ 76 - 0
Source/NNSupportFunctions/arm_s8_to_s16_unordered_with_offset.c

@@ -0,0 +1,76 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in_q7x4 compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in_q7x4 writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_s8_to_s16_unordered_with_offset.c
+ * Description:  Converts the elements of the S8 vector to S16 vector with an added offset
+ *
+ * $Date:        23 Mars 2023
+ * $Revision:    V.1.0.0
+ *
+ * Target :  Arm(R) M-Profile Architecture
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup supportConversion
+ * @{
+ */
+#if defined(ARM_MATH_DSP)
+void arm_s8_to_s16_unordered_with_offset(const int8_t *src, int16_t *dst, int32_t block_size, int16_t offset)
+{
+    int32_t in_s8x4;
+    int32_t in_s16x2_1;
+    int32_t in_s16x2_2;
+    int32_t block_cnt = block_size >> 2;
+
+    /* Compute 4 outputs at a time. */
+    const int32_t offset_s16x2 = PKHBT(offset, offset, 16);
+    while (block_cnt > 0)
+    {
+        in_s8x4 = arm_nn_read_s8x4_ia(&src);
+
+        in_s16x2_1 = SXTAB16(offset_s16x2, in_s8x4);
+        in_s16x2_2 = SXTAB16(offset_s16x2, ROR(in_s8x4, 8));
+
+        arm_nn_write_q15x2_ia(&dst, in_s16x2_1);
+        arm_nn_write_q15x2_ia(&dst, in_s16x2_2);
+
+        block_cnt--;
+    }
+
+    /* Handle left over samples. */
+    block_cnt = block_size % 4;
+    while (block_cnt > 0)
+    {
+        *dst++ = (int16_t)*src++ + offset;
+        block_cnt--;
+    }
+}
+#endif
+
+/**
+ * @} end of Doxygen group
+ */

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/biases_data.h

@@ -1,6 +1,6 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int32_t conv_1_x_n_3_biases[1] = {8349};
+const int32_t conv_1_x_n_3_biases[1] = {26566};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/config_data.h

@@ -1,5 +1,5 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #define CONV_1_X_N_3_OUT_CH 1
 #define CONV_1_X_N_3_IN_CH 4
@@ -19,6 +19,6 @@
 #define CONV_1_X_N_3_OUTPUT_W 148
 #define CONV_1_X_N_3_OUTPUT_H 1
 #define CONV_1_X_N_3_INPUT_OFFSET 128
-#define CONV_1_X_N_3_OUTPUT_OFFSET 19
+#define CONV_1_X_N_3_OUTPUT_OFFSET 74
 #define CONV_1_X_N_3_DILATION_X 1
 #define CONV_1_X_N_3_DILATION_Y 1

+ 65 - 65
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/input_data.h

@@ -1,69 +1,69 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
 const int8_t conv_1_x_n_3_input[1184] = {
-    -4,   -53,  56,   32,   36,   31,   -114, 24,   123,  -14,  -9,   -53,  -109, -13,  -102, 118,  -124, -4,   -87,
-    -108, -79,  111,  126,  83,   -14,  114,  100,  90,   -4,   -25,  -43,  67,   -70,  -54,  86,   -71,  -20,  -110,
-    53,   53,   91,   -123, -64,  -128, 77,   98,   -22,  93,   63,   13,   10,   108,  -88,  64,   -98,  -2,   -107,
-    93,   118,  -13,  -87,  125,  29,   -79,  -106, -90,  -49,  -96,  56,   -29,  105,  44,   70,   105,  -117, 0,
-    35,   -93,  91,   72,   -89,  58,   41,   -19,  -65,  -112, -89,  -47,  -123, -71,  -15,  -115, -106, -41,  -98,
-    -101, -36,  71,   -69,  -100, -26,  53,   -76,  100,  51,   104,  -44,  -37,  80,   -107, 63,   -7,   32,   109,
-    -29,  -91,  -55,  -105, 47,   55,   -71,  -123, -22,  -31,  67,   60,   54,   124,  42,   53,   -108, -6,   66,
-    -118, 126,  -11,  20,   -19,  59,   -108, 95,   -33,  41,   -76,  -88,  -14,  -73,  -32,  -23,  7,    -76,  -48,
-    91,   118,  -102, -72,  112,  87,   108,  -118, -33,  50,   -109, -122, -26,  -70,  -61,  -78,  -81,  -88,  -11,
-    -50,  -86,  114,  -109, -14,  30,   -58,  -92,  -36,  -111, 126,  -68,  -67,  51,   -112, 57,   61,   39,   114,
-    -117, -39,  -113, -61,  -53,  114,  77,   -80,  121,  -124, 65,   96,   -30,  -37,  -68,  67,   88,   -105, -76,
-    -68,  -3,   -93,  105,  90,   -106, 101,  -81,  -23,  -30,  -67,  -49,  74,   -113, -32,  5,    -75,  -110, 43,
-    108,  61,   -91,  73,   26,   -106, -51,  70,   -70,  59,   -27,  -109, 3,    -118, 14,   108,  -95,  94,   115,
-    -7,   110,  113,  -126, 56,   -98,  -33,  46,   115,  -32,  40,   -85,  83,   92,   -118, -13,  -62,  50,   -74,
-    44,   -53,  -106, 24,   102,  14,   78,   -6,   76,   111,  -39,  -125, 45,   111,  50,   -77,  -55,  -117, 105,
-    -67,  -18,  -28,  97,   90,   -92,  -31,  52,   48,   33,   109,  -75,  80,   96,   -27,  -112, 9,    -71,  -17,
-    64,   97,   95,   116,  -36,  72,   40,   -82,  59,   -95,  -32,  65,   120,  115,  -9,   -92,  -101, 119,  25,
-    -116, -114, -111, -94,  1,    -99,  107,  -18,  -1,   -42,  -3,   -12,  65,   -61,  -126, -102, -96,  17,   -3,
-    26,   63,   122,  -102, -59,  -73,  -120, -2,   52,   -81,  -17,  -13,  -124, 98,   94,   100,  -125, 41,   -80,
-    -69,  -33,  24,   119,  -119, 101,  19,   51,   -10,  -53,  -119, 104,  116,  -1,   -101, -41,  -107, 18,   -17,
-    -101, -43,  18,   25,   57,   -37,  32,   99,   36,   -81,  -61,  -60,  -109, 75,   -95,  -51,  25,   49,   -86,
-    -92,  -72,  21,   57,   71,   94,   -18,  20,   -83,  -124, 1,    123,  57,   110,  -88,  -100, 106,  -108, -89,
-    -112, 38,   -82,  -74,  -60,  111,  -39,  39,   -106, 119,  -37,  46,   -70,  -112, -78,  -70,  -83,  9,    89,
-    102,  91,   -123, -71,  29,   -56,  55,   -85,  86,   -29,  -35,  -34,  18,   18,   97,   -98,  -55,  57,   110,
-    121,  94,   -19,  -68,  -67,  44,   -18,  -51,  77,   54,   35,   -6,   -114, -59,  75,   82,   74,   30,   -77,
-    -81,  47,   45,   125,  54,   119,  -78,  22,   109,  124,  119,  -9,   -29,  112,  -66,  90,   -1,   64,   -62,
-    44,   24,   -46,  -94,  -88,  -67,  66,   92,   -121, 55,   12,   -40,  38,   -62,  76,   97,   84,   -58,  -100,
-    -9,   -125, 105,  94,   119,  -38,  -59,  -13,  -26,  99,   -80,  -91,  -32,  87,   -106, 65,   118,  -32,  -83,
-    -99,  -80,  10,   118,  -92,  -115, 22,   -26,  119,  -73,  65,   112,  21,   75,   -120, 20,   45,   58,   80,
-    113,  -30,  -127, -35,  77,   79,   11,   62,   -35,  115,  117,  34,   16,   8,    -97,  16,   -96,  -110, 58,
-    -79,  -77,  -96,  -105, 53,   -89,  6,    -39,  67,   22,   122,  63,   -52,  -68,  8,    104,  65,   38,   -43,
-    97,   102,  96,   113,  95,   4,    7,    -79,  102,  37,   54,   103,  105,  -78,  -11,  109,  -92,  60,   32,
-    -84,  -56,  -13,  -86,  118,  70,   59,   96,   102,  120,  66,   109,  13,   120,  -7,   11,   -7,   -58,  -6,
-    3,    -58,  -86,  35,   91,   -78,  1,    -109, -106, 122,  83,   -34,  -56,  -14,  19,   4,    -65,  23,   90,
-    19,   42,   -101, -7,   100,  -123, -64,  -82,  108,  61,   -85,  50,   -39,  20,   58,   -78,  86,   3,    -28,
-    96,   -27,  126,  -5,   58,   -82,  -86,  -20,  60,   23,   -126, -111, -73,  -6,   -31,  -109, -33,  58,   93,
-    92,   -60,  36,   16,   -101, -41,  5,    31,   74,   -109, -73,  91,   117,  26,   72,   -25,  -126, 119,  -83,
-    81,   65,   -2,   4,    -81,  35,   10,   -105, -40,  104,  62,   -31,  -28,  -36,  11,   -103, -20,  -85,  76,
-    126,  100,  -117, -56,  93,   85,   109,  -91,  -39,  -58,  98,   -128, 120,  37,   -13,  -39,  -125, 91,   50,
-    -59,  -104, 113,  115,  117,  85,   -41,  119,  25,   -84,  44,   -75,  1,    120,  -100, 94,   79,   -51,  -81,
-    5,    -113, 50,   -53,  126,  93,   15,   10,   64,   36,   -122, -64,  -36,  32,   -111, -92,  102,  -88,  12,
-    -37,  -121, 105,  92,   -40,  -35,  -28,  -100, 37,   88,   -31,  -107, -94,  72,   -4,   77,   85,   -127, 42,
-    3,    87,   -91,  109,  47,   -13,  124,  -54,  81,   -71,  22,   42,   -89,  40,   0,    -63,  -22,  -109, -93,
-    85,   -123, -120, -24,  -35,  33,   86,   -124, -105, 44,   -119, 40,   97,   68,   109,  -35,  -31,  3,    -19,
-    53,   114,  -56,  24,   80,   -35,  42,   -22,  -89,  -18,  6,    -11,  15,   17,   11,   -57,  -127, 100,  -74,
-    -112, 6,    25,   32,   -51,  -122, 72,   -99,  -36,  -38,  90,   84,   82,   -69,  37,   -52,  -99,  -93,  -19,
-    98,   -15,  70,   27,   86,   41,   99,   -26,  -43,  104,  125,  80,   -5,   -40,  -50,  -63,  -103, -93,  91,
-    -89,  72,   49,   39,   -32,  -111, 39,   -6,   0,    -94,  52,   -65,  -42,  -17,  -109, 67,   -101, 117,  -90,
-    106,  -68,  46,   -49,  -116, 98,   103,  73,   65,   53,   -48,  -11,  39,   72,   54,   4,    -106, -60,  20,
-    84,   -104, -42,  56,   -102, 50,   -69,  66,   99,   97,   -58,  47,   51,   73,   -118, 93,   104,  121,  2,
-    -126, -90,  50,   -84,  19,   95,   -73,  -100, -79,  -106, -94,  0,    67,   103,  88,   -46,  79,   0,    29,
-    16,   -22,  -104, 77,   -16,  -91,  28,   -37,  -77,  29,   45,   -105, 43,   -43,  -58,  -108, -80,  -109, -72,
-    115,  20,   -59,  111,  67,   -37,  121,  -100, -98,  49,   -117, -3,   99,   -7,   -54,  125,  -2,   -55,  48,
-    -8,   80,   70,   -110, -125, -52,  86,   -48,  55,   125,  72,   -36,  73,   -19,  91,   81,   -39,  -57,  5,
-    114,  64,   63,   -99,  54,   -40,  -53,  47,   21,   -86,  15,   98,   -11,  -116, -15,  113,  41,   79,   90,
-    78,   69,   -71,  114,  104,  -124, -55,  85,   12,   -71,  -3,   98,   -22,  23,   95,   -51,  89,   119,  38,
-    -126, 74,   6,    11,   69,   -113, -5,   -38,  12,   -18,  53,   46,   -118, -121, -46,  -96,  9,    2,    -51,
-    -72,  -58,  2,    123,  -70,  -2,   57,   17,   114,  126,  35,   -64,  66,   105,  58,   -50,  -66,  81,   -37,
-    121,  16,   16,   83,   -53,  16,   63,   53,   105,  -17,  -34,  39,   -94,  -24,  45,   79,   86,   -119, -19,
-    90,   0,    -107, 18,   -54,  93,   -87,  -29,  -57,  70,   -25,  119,  76,   -30,  13,   -50,  -116, -1,   11,
-    91,   25,   -105, -27,  107,  -84,  -9,   -92,  125,  27,   -106, 56,   -57,  -1,   5,    -12,  -51,  17,   -82,
-    -1,   77,   4,    -32,  43,   -58,  -62,  -36,  -74,  -42,  100,  -42,  -48,  65,   9,    9,    121,  -44,  120,
-    -82,  -26,  97,   -111, -3,   52};
+    -18,  -53,  23,   29,   41,   -126, -42,  97,   -126, 26,   59,   102,  108,  -10,  -115, 73,   -33,  -16,  16,
+    25,   68,   79,   -105, -122, -20,  53,   -74,  -15,  80,   42,   84,   -59,  -43,  120,  108,  27,   111,  34,
+    -33,  -74,  -21,  -112, -47,  -51,  35,   113,  -108, 84,   42,   -16,  56,   9,    -100, 41,   -67,  -101, -99,
+    63,   -102, -4,   -21,  -98,  93,   37,   -26,  -109, 61,   -69,  23,   -111, -10,  -128, -103, 25,   15,   92,
+    -34,  -58,  -3,   42,   -36,  65,   104,  78,   -84,  -41,  -109, 41,   13,   -106, 98,   26,   123,  100,  -8,
+    112,  -126, 43,   103,  -102, 71,   33,   -93,  -59,  36,   -75,  -107, -69,  -63,  -15,  80,   -55,  -56,  24,
+    -118, -74,  74,   103,  -20,  64,   -92,  100,  23,   -78,  -82,  -109, 80,   -42,  4,    98,   -67,  4,    -75,
+    110,  -99,  -48,  58,   44,   -103, 72,   86,   -42,  56,   41,   40,   -47,  -93,  -50,  -78,  -107, 6,    -56,
+    -52,  20,   59,   -111, -63,  116,  -84,  -68,  38,   74,   -47,  -54,  51,   -62,  107,  116,  65,   -48,  101,
+    -64,  -3,   124,  92,   67,   -43,  121,  125,  57,   -29,  -3,   67,   -44,  45,   123,  125,  -22,  43,   96,
+    -30,  -127, -71,  89,   64,   33,   -44,  20,   68,   124,  9,    -64,  -68,  -90,  97,   -125, -20,  -54,  72,
+    -10,  118,  15,   57,   -90,  -60,  -9,   108,  80,   97,   54,   67,   -53,  74,   -127, -33,  -7,   -84,  -88,
+    92,   29,   -67,  -102, -116, 18,   -82,  -127, 3,    104,  83,   -45,  -14,  -5,   -54,  121,  105,  114,  -18,
+    -69,  -63,  98,   35,   -123, -58,  58,   82,   -77,  81,   97,   -123, -42,  71,   -77,  124,  -93,  16,   -57,
+    90,   -125, 94,   -26,  56,   101,  -40,  -97,  99,   77,   95,   -88,  -9,   77,   -4,   45,   83,   67,   115,
+    -80,  -20,  -110, -75,  48,   98,   78,   52,   51,   13,   -96,  -48,  -35,  3,    121,  -100, 41,   13,   112,
+    -30,  -35,  -107, -91,  -109, 82,   107,  65,   93,   -19,  -58,  98,   -36,  90,   -89,  -36,  -76,  -7,   57,
+    67,   -126, 71,   1,    -28,  -57,  35,   -42,  60,   22,   101,  -38,  57,   89,   -43,  18,   -8,   39,   66,
+    -128, 76,   -45,  29,   -52,  121,  -38,  22,   -94,  121,  48,   86,   102,  -58,  -105, -31,  100,  115,  -118,
+    119,  50,   -99,  42,   -97,  -62,  1,    71,   83,   -6,   95,   -119, 41,   -127, -100, 28,   -99,  -20,  55,
+    -13,  9,    74,   -14,  -38,  -71,  -21,  -32,  94,   92,   -72,  -86,  -2,   -9,   121,  78,   97,   109,  -78,
+    73,   -14,  -97,  -104, -88,  -41,  115,  -32,  -73,  51,   -58,  105,  -101, -62,  120,  -46,  -53,  27,   -17,
+    -70,  -25,  -61,  73,   56,   29,   34,   -91,  100,  92,   121,  -23,  54,   -49,  122,  -113, 96,   33,   25,
+    -1,   45,   -53,  -42,  -96,  -27,  -23,  56,   93,   24,   54,   39,   15,   11,   -103, 50,   -82,  -102, 80,
+    -83,  -22,  93,   32,   75,   118,  -94,  87,   52,   -38,  -87,  124,  -36,  -77,  40,   -33,  121,  -3,   3,
+    33,   74,   98,   95,   24,   32,   32,   -91,  15,   126,  21,   48,   -73,  126,  5,    42,   -113, 9,    -89,
+    -66,  -61,  107,  30,   13,   21,   -82,  63,   -71,  -30,  -29,  51,   99,   -63,  -52,  -75,  -122, 56,   69,
+    -30,  -66,  -110, -73,  -126, 110,  14,   -78,  -37,  -29,  -120, -72,  -125, 47,   103,  -88,  49,   -27,  109,
+    72,   37,   -3,   -5,   -66,  -101, -60,  -66,  36,   -68,  2,    -46,  -42,  73,   4,    -91,  -69,  -116, 123,
+    48,   78,   -47,  -86,  -126, 85,   19,   -78,  -65,  -11,  37,   66,   -122, -32,  -76,  12,   59,   -58,  43,
+    -66,  53,   -102, -78,  -46,  -46,  7,    27,   -109, -39,  -66,  84,   115,  60,   -104, 38,   -95,  115,  93,
+    47,   118,  79,   81,   77,   -125, 28,   -25,  -106, 57,   -78,  15,   94,   7,    -92,  108,  39,   -98,  57,
+    69,   36,   8,    -17,  18,   -114, -26,  -84,  11,   71,   -118, 31,   -23,  41,   25,   -61,  113,  -88,  -47,
+    24,   -118, 111,  -98,  -24,  84,   12,   112,  3,    65,   32,   53,   -127, -14,  83,   -24,  113,  124,  70,
+    -111, 89,   64,   -109, -69,  10,   120,  101,  12,   48,   -76,  -119, 64,   126,  -121, 74,   82,   44,   -119,
+    -37,  100,  78,   87,   -96,  -26,  -12,  21,   71,   10,   20,   -106, -66,  33,   0,    -4,   -127, 73,   -100,
+    115,  -109, 100,  -52,  -89,  71,   5,    103,  48,   -99,  -123, 48,   -120, 76,   -118, -24,  -68,  -60,  20,
+    23,   -46,  -104, 55,   121,  63,   122,  15,   -10,  -90,  -99,  34,   74,   -44,  40,   -104, -75,  26,   122,
+    -34,  -127, -93,  82,   108,  81,   -112, -72,  59,   20,   60,   14,   31,   -47,  -40,  -17,  -47,  -126, 100,
+    91,   105,  -120, 84,   -111, -97,  -83,  -23,  -91,  72,   -86,  106,  69,   -80,  -17,  -91,  -117, 24,   -128,
+    61,   -27,  -52,  -116, 43,   -40,  -117, -72,  125,  38,   34,   98,   115,  116,  -46,  99,   -105, 96,   -58,
+    84,   -57,  -59,  -85,  122,  -42,  71,   56,   -43,  59,   -6,   59,   -4,   70,   76,   -99,  -110, -88,  34,
+    -107, -33,  -102, -79,  -127, 95,   12,   94,   4,    -2,   81,   66,   68,   -10,  86,   -24,  -19,  70,   -110,
+    49,   -1,   -7,   71,   -9,   -48,  -96,  10,   -69,  -73,  -22,  37,   100,  117,  -4,   -36,  112,  -109, 74,
+    16,   40,   -98,  -122, 5,    -87,  -8,   -5,   64,   17,   -82,  -119, 12,   123,  -65,  122,  -35,  -122, -52,
+    115,  21,   -3,   -74,  -11,  -79,  24,   -35,  -108, -79,  -9,   10,   76,   16,   123,  -2,   -27,  78,   11,
+    -99,  -67,  -125, 73,   -18,  -63,  -3,   -107, 32,   26,   -19,  -59,  19,   -80,  120,  -45,  88,   -42,  -84,
+    92,   -82,  -80,  126,  50,   -45,  100,  90,   11,   41,   -81,  79,   123,  41,   -99,  -59,  5,    29,   -118,
+    80,   -68,  118,  81,   125,  -70,  115,  -59,  -109, 76,   -4,   75,   70,   37,   117,  -29,  -53,  -100, -30,
+    81,   92,   35,   94,   18,   -23,  90,   7,    3,    93,   117,  -29,  83,   100,  46,   -63,  111,  111,  -104,
+    -3,   -9,   87,   45,   -100, -122, -99,  71,   22,   -41,  -22,  -27,  26,   -20,  62,   71,   111,  9,    -30,
+    44,   94,   -67,  -114, 3,    103,  67,   10,   114,  -2,   24,   -63,  122,  -65,  20,   -83,  27,   18,   37,
+    34,   -64,  -90,  -18,  -77,  -106, 1,    -10,  50,   91,   38,   -122, -78,  4,    -110, -15,  -26,  -27,  49,
+    -93,  -67,  47,   18,   41,   -4,   -108, -47,  -76,  -109, 112,  75,   60,   -97,  -75,  -89,  74,   -98,  -57,
+    -23,  -112, -97,  -85,  -60,  35,   -21,  5,    -7,   -56,  3,    -85,  70,   -53,  88,   -77,  79,   74,   116,
+    92,   -97,  98,   39,   72,   -127, 38,   103,  77,   76,   -111, 95,   58,   75,   -64,  -71,  47,   26,   81,
+    123,  58,   -24,  -18,  -79,  62,   40,   -78,  124,  -117, -108, 7,    7,    58,   -22,  -14,  19,   11,   57,
+    56,   -66,  42,   67,   8,    80,   2,    37,   -72,  -94,  -54,  117,  109,  -26,  94,   -97,  95,   15,   105,
+    41,   54,   59,   34,   67,   123,  112,  -101, -35,  -10,  -123, -22,  24,   36,   46,   -28,  -28,  -56,  -24,
+    -85,  -48,  -34,  -20,  -122, -26,  2,    123,  -13,  -12,  6,    126,  -59,  4,    6,    58,   44,   -113, 117,
+    -94,  11,   -52,  -2,   -46,  -14,  125,  -46,  -93,  58,   46,   22,   -9,   100,  26,   -50,  -26,  39,   -111,
+    -83,  113,  77,   -125, -63,  92,   114,  -96,  -81,  1,    108,  -38,  33,   -53,  27,   50,   -127, 99,   33,
+    3,    -58,  -122, 25,   -52,  -89};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/output_mult_data.h

@@ -1,6 +1,6 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int32_t conv_1_x_n_3_output_mult[1] = {1283345963};
+const int32_t conv_1_x_n_3_output_mult[1] = {1613642951};

+ 9 - 9
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/output_ref_data.h

@@ -1,13 +1,13 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
 const int8_t conv_1_x_n_3_output_ref[148] = {
-    -33, 6,   -16, -25, 71,  31,  67,  100, 2,   -19, 37,  22,  4,   -23, -29, 64,  12,   -32, 106,  83,  -37, 58,
-    1,   44,  -11, -23, -31, -93, -70, 40,  -72, -28, 3,   -33, 18,  -93, 18,  60,  36,   -39, -22,  78,  24,  20,
-    -47, 37,  29,  -16, 73,  -50, -74, -83, -67, -72, -28, 35,  -39, -60, 7,   -53, -7,   -22, 20,   70,  12,  -2,
-    -76, -75, -21, -60, -14, -71, 3,   72,  14,  19,  -16, 61,  18,  39,  12,  41,  -9,   -66, -111, -31, -43, -3,
-    -30, 27,  -40, -26, 8,   13,  -42, 124, -2,  -8,  -18, -45, -3,  -5,  50,  -77, -4,   15,  -87,  -28, 30,  -65,
-    35,  -16, -31, -28, -30, 26,  -64, -38, -21, -1,  19,  -15, 54,  -40, -13, -45, -111, -5,  -59,  -22, 80,  2,
-    8,   -26, -49, 28,  -41, 70,  12,  20,  54,  -18, 64,  43,  54,  89,  71,  83};
+    -6, -8,  28,  20,  -21, -26, 23,  3,   -15, -12, 9,   37,  25, 57,  -8,  9,  -12, 54,  -12, -23, 75,  -47,
+    25, 48,  -13, -28, 52,  19,  -1,  -30, -29, -28, 1,   4,   27, 68,  32,  -5, -21, -19, -6,  22,  22,  42,
+    24, -18, -20, -25, 6,   23,  29,  21,  17,  5,   10,  41,  22, -44, 49,  3,  -4,  61,  56,  38,  34,  -25,
+    36, 41,  42,  37,  22,  0,   -12, -24, -37, 28,  -14, -17, 17, -29, 0,   68, 5,   39,  75,  -42, -32, 81,
+    1,  -45, 83,  -37, 39,  73,  -28, -20, 29,  12,  11,  1,   25, -13, 57,  15, 74,  -20, -2,  -34, 32,  31,
+    28, -32, 27,  -17, 10,  13,  15,  20,  26,  56,  16,  36,  7,  55,  -43, 5,  -18, -4,  -1,  26,  -28, 30,
+    13, 18,  25,  55,  -12, -5,  -40, 17,  18,  57,  103, 107, 94, 69,  107, 121};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/output_shift_data.h

@@ -1,6 +1,6 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int32_t conv_1_x_n_3_output_shift[1] = {-10};
+const int32_t conv_1_x_n_3_output_shift[1] = {-11};

+ 2 - 2
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/test_data.h

@@ -1,5 +1,5 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #include "biases_data.h"
 #include "config_data.h"
 #include "input_data.h"

+ 13 - 12
Tests/UnitTest/TestCases/TestData/conv_1_x_n_3/weights_data.h

@@ -1,16 +1,17 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision upstream/v2.10.0-0-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
 const int8_t conv_1_x_n_3_weights[192] = {
-    -79,  -73,  -34,  -10,  -49,  5,   72,  -122, -41,  -61, -111, -110, -24, -31, -13,  18,  100,  -104, 113, -39,
-    64,   81,   -101, -55,  73,   -99, 12,  -14,  -86,  -42, 95,   -110, -90, -39, 84,   -77, 63,   -48,  29,  -66,
-    -26,  -111, 93,   16,   47,   102, -77, -67,  28,   63,  40,   50,   88,  106, -55,  21,  -95,  79,   83,  31,
-    114,  11,   -75,  63,   75,   -58, -43, 107,  104,  -2,  -23,  81,   -67, -47, -58,  112, 30,   29,   72,  68,
-    121,  -110, 70,   48,   -55,  81,  107, 12,   40,   99,  47,   101,  -10, -27, -29,  53,  -24,  119,  69,  -64,
-    -74,  71,   -68,  -90,  15,   79,  115, 3,    109,  -91, -16,  3,    -77, -37, 34,   -32, -16,  103,  93,  -26,
-    82,   -122, -59,  -20,  22,   -7,  -60, -64,  -94,  -34, -29,  9,    -94, -98, -103, 112, 55,   43,   55,  24,
-    -116, -124, -61,  -123, -49,  99,  -91, -83,  -110, 124, -107, 13,   118, 122, -88,  -72, -114, 117,  -19, 95,
-    -54,  108,  -62,  -38,  -105, 46,  12,  33,   50,   -89, 36,   -116, 109, -59, 46,   60,  -43,  -56,  38,  16,
-    -122, -73,  8,    -18,  -127, -36, -73, -88,  122,  63,  -23,  80};
+    123,  117,  -100, 124,  123,  -38,  97,   83,   90,   0,    97,   -15, 28,   80,   -125, -29,  20,  50,
+    -89,  15,   125,  -64,  87,   50,   31,   -74,  -18,  -37,  -5,   73,  36,   5,    -61,  -113, 37,  -37,
+    -8,   -20,  90,   7,    64,   -112, -123, 112,  59,   102,  -102, -67, -85,  80,   -76,  98,   37,  -43,
+    11,   14,   -24,  -57,  -106, -91,  -68,  35,   -3,   119,  -92,  64,  -122, -104, 92,   58,   -59, -117,
+    95,   -46,  79,   10,   64,   -63,  77,   31,   49,   -55,  -38,  -84, -113, -122, 51,   16,   67,  85,
+    -94,  28,   67,   55,   97,   -46,  -58,  17,   -65,  37,   69,   -99, 100,  -30,  -65,  -44,  92,  49,
+    -92,  16,   64,   -30,  -121, -105, -24,  -115, 73,   104,  -93,  -98, -116, -3,   -115, 27,   18,  30,
+    -32,  118,  -125, 91,   103,  -82,  -113, -5,   -125, -119, 64,   85,  -41,  -98,  -83,  -78,  -98, -88,
+    -13,  -104, -63,  -16,  -127, 23,   9,    88,   -93,  -64,  -82,  20,  -100, -53,  22,   53,   62,  -46,
+    -117, -115, -72,  106,  54,   -124, -84,  -119, -71,  -25,  108,  56,  57,   -23,  19,   -19,  105, -127,
+    -31,  114,  28,   -106, -125, 110,  116,  -21,  113,  -72,  -1,   -15};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_out_activation/biases_data.h

@@ -1,6 +1,6 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int32_t conv_out_activation_biases[2] = {-28638, -5746};
+const int32_t conv_out_activation_biases[2] = {5171, -13428};

+ 5 - 5
Tests/UnitTest/TestCases/TestData/conv_out_activation/config_data.h

@@ -1,12 +1,12 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #define CONV_OUT_ACTIVATION_OUT_CH 2
-#define CONV_OUT_ACTIVATION_IN_CH 2
+#define CONV_OUT_ACTIVATION_IN_CH 4
 #define CONV_OUT_ACTIVATION_INPUT_W 3
 #define CONV_OUT_ACTIVATION_INPUT_H 3
 #define CONV_OUT_ACTIVATION_DST_SIZE 18
-#define CONV_OUT_ACTIVATION_INPUT_SIZE 18
+#define CONV_OUT_ACTIVATION_INPUT_SIZE 36
 #define CONV_OUT_ACTIVATION_OUT_ACTIVATION_MIN -61
 #define CONV_OUT_ACTIVATION_OUT_ACTIVATION_MAX 107
 #define CONV_OUT_ACTIVATION_INPUT_BATCHES 1
@@ -19,6 +19,6 @@
 #define CONV_OUT_ACTIVATION_OUTPUT_W 3
 #define CONV_OUT_ACTIVATION_OUTPUT_H 3
 #define CONV_OUT_ACTIVATION_INPUT_OFFSET 128
-#define CONV_OUT_ACTIVATION_OUTPUT_OFFSET 127
+#define CONV_OUT_ACTIVATION_OUTPUT_OFFSET -22
 #define CONV_OUT_ACTIVATION_DILATION_X 1
 #define CONV_OUT_ACTIVATION_DILATION_Y 1

+ 5 - 4
Tests/UnitTest/TestCases/TestData/conv_out_activation/input_data.h

@@ -1,7 +1,8 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int8_t conv_out_activation_input[18] =
-    {23, -12, -1, -29, 63, -36, 83, -71, -26, 66, 56, -37, -6, -95, -18, 3, -23, 21};
+const int8_t conv_out_activation_input[36] = {125, 105, -111, -15, -68,  114, 28,  -72, 85,  54,  70,  -117,
+                                              -26, 47,  77,   -24, -106, -28, 59,  33,  -77, -46, 101, 8,
+                                              14,  -55, -63,  -73, 12,   120, -67, -93, 79,  33,  48,  -22};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_out_activation/output_mult_data.h

@@ -1,6 +1,6 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int32_t conv_out_activation_output_mult[2] = {1841065486, 1886711715};
+const int32_t conv_out_activation_output_mult[2] = {1682042536, 1738581862};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_out_activation/output_ref_data.h

@@ -1,7 +1,7 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
 const int8_t conv_out_activation_output_ref[18] =
-    {74, 66, 33, 53, 89, 92, 36, 103, -61, 4, 48, 67, -9, 107, -19, 86, 22, 77};
+    {7, -61, 27, -61, 35, -46, 11, -39, 86, -46, 27, -61, -44, -61, -28, -61, -13, -61};

+ 3 - 3
Tests/UnitTest/TestCases/TestData/conv_out_activation/output_shift_data.h

@@ -1,6 +1,6 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int32_t conv_out_activation_output_shift[2] = {-9, -9};
+const int32_t conv_out_activation_output_shift[2] = {-10, -10};

+ 2 - 2
Tests/UnitTest/TestCases/TestData/conv_out_activation/test_data.h

@@ -1,5 +1,5 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #include "biases_data.h"
 #include "config_data.h"
 #include "input_data.h"

+ 7 - 5
Tests/UnitTest/TestCases/TestData/conv_out_activation/weights_data.h

@@ -1,8 +1,10 @@
-// Generated by generate_test_data.py using tensorflow version 2.10.0 (Keras version 2.10.0).
-// Interpreter from tensorflow version 2.10.0 and revision v2.10.0-rc3-6-g359c3cdfc5f.
+// Generated by generate_test_data.py using tensorflow version 2.11.0 (Keras version 2.11.0).
+// Interpreter from tensorflow version 2.11.0 and revision v2.11.0-rc2-17-gd5b57ca93e5.
 #pragma once
 #include <stdint.h>
 
-const int8_t conv_out_activation_weights[36] = {-4,  -65, -85, -9, -108, -106, -17,  17,  93,  -97, -127, 23,
-                                                -81, 42,  -12, 0,  29,   40,   -90,  -74, -10, -42, 74,   -104,
-                                                -58, 74,  17,  5,  -58,  123,  -127, 59,  -90, -5,  16,   -104};
+const int8_t conv_out_activation_weights[72] = {
+    10,  -29, -33,  73,   -93, 42, -59,  64,   26, 69,  -20, -115, -118, 70,  127, 80,  16,  -19,
+    -46, 58,  -49,  -58,  116, 36, 69,   80,   67, 121, 89,  -29,  50,   -81, 82,  -20, 76,  29,
+    6,   -8,  -127, -14,  28,  93, -123, -45,  14, 46,  -60, 22,   -98,  43,  101, 56,  -52, -87,
+    -68, -32, 77,   -108, 37,  99, 41,   -127, -3, 115, -70, -78,  75,   -96, 6,   40,  -95, 54};

+ 3 - 5
Tests/UnitTest/generate_test_data.py

@@ -2088,8 +2088,7 @@ def load_testdata_sets() -> dict:
                                           stride_y=1,
                                           pad=True,
                                           out_activation_min=-88,
-                                          out_activation_max=127)    
-
+                                          out_activation_max=127)
     dataset = 'conv_1_x_n_5'
     testdata_sets[dataset] = ConvSettings(dataset,
                                           type_of_test,
@@ -2107,8 +2106,7 @@ def load_testdata_sets() -> dict:
                                           stride_y=1,
                                           pad=True,
                                           out_activation_min=-88,
-                                          out_activation_max=127)                                              
-
+                                          out_activation_max=127)
     dataset = 'conv_2'
     testdata_sets[dataset] = ConvSettings(dataset,
                                           type_of_test,
@@ -2171,7 +2169,7 @@ def load_testdata_sets() -> dict:
                                           regenerate_input,
                                           regenerate_biases,
                                           schema_file,
-                                          in_ch=2,
+                                          in_ch=4,
                                           out_ch=2,
                                           x_in=3,
                                           y_in=3,