Kaynağa Gözat

Correct buffer size for depthwise conv wrapper (#23)

BUG=https://github.com/ARM-software/CMSIS-NN/issues/19
Måns Nilsson 3 yıl önce
ebeveyn
işleme
2e4abf537d

+ 11 - 5
Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c

@@ -22,8 +22,8 @@
  * Description:  Wrapper API to select appropriate depthwise conv API based
  *               on dimensions.
  *
- * $Date:        26 October 2022
- * $Revision:    V.2.0.1
+ * $Date:        23 November 2022
+ * $Revision:    V.2.0.2
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -63,8 +63,8 @@ arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
         dw_conv_params->dilation.h == 1)
     {
 #if !defined(ARM_MATH_MVEI)
-        if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1) &&
-            (dw_conv_params->padding.w <= 1))
+        if (filter_dims->w == 3 && filter_dims->h == 3 && dw_conv_params->padding.h <= 1 &&
+            dw_conv_params->padding.w <= 1)
         {
             status = arm_depthwise_conv_3x3_s8(ctx,
                                                dw_conv_params,
@@ -118,12 +118,18 @@ int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_par
                                                       const cmsis_nn_dims *filter_dims,
                                                       const cmsis_nn_dims *output_dims)
 {
-    (void)dw_conv_params;
     int32_t size = 0;
 
     if (input_dims->c == output_dims->c && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
         dw_conv_params->dilation.h == 1)
     {
+#if !defined(ARM_MATH_MVEI)
+        if (filter_dims->w == 3 && filter_dims->h == 3 && dw_conv_params->padding.h <= 1 &&
+            dw_conv_params->padding.w <= 1)
+        {
+            return size;
+        }
+#endif
         size = arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims);
     }
 

+ 48 - 0
Tests/UnitTest/TestCases/Utils/utils.h

@@ -0,0 +1,48 @@
+/*
+ * SPDX-FileCopyrightText: Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <stdint.h>
+
+static inline const int32_t *get_bias_address(const int32_t *bias, int32_t size)
+{
+    const int32_t *return_bias = NULL;
+    for (int i = 0; i < size; i++)
+    {
+        if (bias[i] != 0)
+        {
+            return_bias = bias;
+            break;
+        }
+    }
+    return return_bias;
+}
+
+static inline const int64_t *get_bias_s64_address(const int64_t *bias, int32_t size)
+{
+    const int64_t *return_bias = NULL;
+    for (int i = 0; i < size; i++)
+    {
+        if (bias[i] != 0)
+        {
+            return_bias = bias;
+            break;
+        }
+    }
+    return return_bias;
+}

+ 2 - 0
Tests/UnitTest/TestCases/test_arm_depthwise_conv_3x3_s8/Unity/unity_test_arm_depthwise_conv_3x3_s8.c

@@ -52,3 +52,5 @@ void test_depthwise_kernel_3x3_arm_depthwise_conv_3x3_null_bias_s8(void)
 {
     depthwise_kernel_3x3_null_bias_arm_depthwise_conv_3x3_null_bias_s8();
 }
+
+void test_stride2pad1_arm_depthwise_conv_3x3_s8(void) { stride2pad1_arm_depthwise_conv_3x3_s8(); }

+ 131 - 18
Tests/UnitTest/TestCases/test_arm_depthwise_conv_3x3_s8/test_arm_depthwise_conv_3x3_s8.c

@@ -22,22 +22,10 @@
 
 #include "../TestData/depthwise_kernel_3x3/test_data.h"
 #include "../TestData/depthwise_kernel_3x3_null_bias/test_data.h"
+#include "../TestData/stride2pad1/test_data.h"
+#include "../Utils/utils.h"
 #include "../Utils/validate.h"
 
-const int32_t *get_bias_address(const int32_t *bias, int32_t size)
-{
-    const int32_t *return_bias = NULL;
-    for (int i = 0; i < size; i++)
-    {
-        if (bias[i] != 0)
-        {
-            return_bias = bias;
-            break;
-        }
-    }
-    return return_bias;
-}
-
 void depthwise_kernel_3x3_arm_depthwise_conv_3x3_s8(void)
 {
     const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
@@ -109,8 +97,16 @@ void depthwise_kernel_3x3_arm_depthwise_conv_3x3_s8(void)
 
     const int32_t buf_size =
         arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+#if defined(ARM_MATH_MVEI)
+    TEST_ASSERT_TRUE(buf_size > 0);
+#else
+    TEST_ASSERT_EQUAL(buf_size, 0);
+#endif
+
     ctx.buf = malloc(buf_size);
-    ctx.size = 0;
+    ctx.size = buf_size;
+
     result = arm_depthwise_conv_wrapper_s8(&ctx,
                                            &dw_conv_params,
                                            &quant_params,
@@ -201,8 +197,16 @@ void depthwise_kernel_3x3_arm_depthwise_conv_3x3_1_s8(void)
     const arm_cmsis_nn_status expected_wrapper = ARM_CMSIS_NN_SUCCESS;
     const int32_t buf_size =
         arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    // Not 3x3 variant since negative test.
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(buf_size > 0);
+#else
+    TEST_ASSERT_EQUAL(buf_size, 0);
+#endif
+
     ctx.buf = malloc(buf_size);
-    ctx.size = 0;
+    ctx.size = buf_size;
 
     result = arm_depthwise_conv_wrapper_s8(&ctx,
                                            &dw_conv_params,
@@ -296,8 +300,15 @@ void depthwise_kernel_3x3_null_bias_arm_depthwise_conv_3x3_null_bias_s8(void)
     const arm_cmsis_nn_status expected_wrapper = ARM_CMSIS_NN_SUCCESS;
     const int32_t buf_size =
         arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+#if defined(ARM_MATH_MVEI)
+    TEST_ASSERT_TRUE(buf_size > 0);
+#else
+    TEST_ASSERT_EQUAL(buf_size, 0);
+#endif
+
     ctx.buf = malloc(buf_size);
-    ctx.size = 0;
+    ctx.size = buf_size;
 
     result = arm_depthwise_conv_wrapper_s8(&ctx,
                                            &dw_conv_params,
@@ -318,4 +329,106 @@ void depthwise_kernel_3x3_null_bias_arm_depthwise_conv_3x3_null_bias_s8(void)
     }
     TEST_ASSERT_EQUAL(expected_wrapper, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_kernel_3x3_null_bias_output_ref, output_ref_size));
-}
+}
+
+void stride2pad1_arm_depthwise_conv_3x3_s8(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[STRIDE2PAD1_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_dw_conv_params dw_conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims = {};
+    cmsis_nn_dims output_dims;
+
+    const int32_t output_ref_size = STRIDE2PAD1_DST_SIZE;
+    const int32_t *bias_data = get_bias_address(stride2pad1_biases, STRIDE2PAD1_OUT_CH);
+    const int8_t *kernel_data = stride2pad1_weights;
+    const int8_t *input_data = stride2pad1_input;
+
+    input_dims.n = STRIDE2PAD1_INPUT_BATCHES;
+    input_dims.w = STRIDE2PAD1_INPUT_W;
+    input_dims.h = STRIDE2PAD1_INPUT_H;
+    input_dims.c = STRIDE2PAD1_IN_CH;
+    filter_dims.w = STRIDE2PAD1_FILTER_X;
+    filter_dims.h = STRIDE2PAD1_FILTER_Y;
+    output_dims.w = STRIDE2PAD1_OUTPUT_W;
+    output_dims.h = STRIDE2PAD1_OUTPUT_H;
+    output_dims.c = STRIDE2PAD1_OUT_CH;
+
+    dw_conv_params.padding.w = STRIDE2PAD1_PAD_X;
+    dw_conv_params.padding.h = STRIDE2PAD1_PAD_Y;
+    dw_conv_params.stride.w = STRIDE2PAD1_STRIDE_X;
+    dw_conv_params.stride.h = STRIDE2PAD1_STRIDE_Y;
+    dw_conv_params.dilation.w = STRIDE2PAD1_DILATION_X;
+    dw_conv_params.dilation.h = STRIDE2PAD1_DILATION_Y;
+
+    dw_conv_params.ch_mult = 1;
+
+    dw_conv_params.input_offset = STRIDE2PAD1_INPUT_OFFSET;
+    dw_conv_params.output_offset = STRIDE2PAD1_OUTPUT_OFFSET;
+    dw_conv_params.activation.min = STRIDE2PAD1_OUT_ACTIVATION_MIN;
+    dw_conv_params.activation.max = STRIDE2PAD1_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)stride2pad1_output_mult;
+    quant_params.shift = (int32_t *)stride2pad1_output_shift;
+
+    ctx.buf = NULL;
+    ctx.size = 0;
+
+    arm_cmsis_nn_status result = arm_depthwise_conv_3x3_s8(&ctx,
+                                                           &dw_conv_params,
+                                                           &quant_params,
+                                                           &input_dims,
+                                                           input_data,
+                                                           &filter_dims,
+                                                           kernel_data,
+                                                           &bias_dims,
+                                                           bias_data,
+                                                           &output_dims,
+                                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, ctx.size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, stride2pad1_output_ref, output_ref_size));
+
+    memset(output, 0, sizeof(output));
+
+    const int32_t buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+#if defined(ARM_MATH_MVEI)
+    TEST_ASSERT_TRUE(buf_size > 0);
+#else
+    TEST_ASSERT_EQUAL(buf_size, 0);
+#endif
+
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, stride2pad1_output_ref, output_ref_size));
+}

+ 37 - 23
Tests/UnitTest/TestCases/test_arm_depthwise_conv_fast_s16/test_arm_depthwise_conv_fast_s16.c

@@ -28,22 +28,9 @@
 #include "../TestData/dw_int16xint8_fast_stride/test_data.h"
 #include "../TestData/dw_int16xint8_fast_stride_null_bias/test_data.h"
 #include "../TestData/dw_int16xint8_fast_test_bias/test_data.h"
+#include "../Utils/utils.h"
 #include "../Utils/validate.h"
 
-const int64_t *get_bias_s64_address(const int64_t *bias, int32_t size)
-{
-    const int64_t *return_bias = NULL;
-    for (int i = 0; i < size; i++)
-    {
-        if (bias[i] != 0)
-        {
-            return_bias = bias;
-            break;
-        }
-    }
-    return return_bias;
-}
-
 void dw_int16xint8_fast_arm_depthwise_conv_fast_s16(void)
 {
     const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
@@ -114,7 +101,10 @@ void dw_int16xint8_fast_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -207,7 +197,10 @@ void dw_int16xint8_fast_spill_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -300,7 +293,10 @@ void dw_int16xint8_fast_stride_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -394,7 +390,10 @@ void dw_int16xint8_fast_null_bias_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -488,7 +487,10 @@ void dw_int16xint8_fast_stride_null_bias_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -582,7 +584,10 @@ void dw_int16xint8_fast_spill_null_bias_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -676,7 +681,10 @@ void dw_int16xint8_fast_test_bias_arm_depthwise_conv_fast_s16(void)
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -770,7 +778,10 @@ void dw_int16xint8_fast_multiple_batches_uneven_buffers_arm_depthwise_conv_fast_
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -865,7 +876,10 @@ void dw_int16xint8_fast_multiple_batches_uneven_buffers_null_bias_arm_depthwise_
     TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
     memset(output, 0, sizeof(output));
 
-    buf_size = arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(
+        buf_size,
+        arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims));
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,

+ 9 - 0
Tests/UnitTest/TestCases/test_arm_depthwise_conv_s16/test_arm_depthwise_conv_s16.c

@@ -95,6 +95,9 @@ void dw_int16xint8_arm_depthwise_conv_s16(void)
 
     int buf_size =
         arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(buf_size, 0);
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -189,6 +192,9 @@ void dw_int16xint8_dilation_arm_depthwise_conv_s16(void)
 
     int buf_size =
         arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(buf_size, 0);
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,
@@ -283,6 +289,9 @@ void dw_int16xint8_mult4_arm_depthwise_conv_s16(void)
 
     int buf_size =
         arm_depthwise_conv_wrapper_s16_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(buf_size, 0);
+
     ctx.buf = malloc(buf_size);
 
     result = arm_depthwise_conv_wrapper_s16(&ctx,

+ 0 - 10
Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c

@@ -44,20 +44,10 @@ void setUp(void)
  */
 void tearDown(void) {}
 
-void test_basic_arm_depthwise_conv_s8(void) { basic_arm_depthwise_conv_s8(); }
-
-void test_stride2pad1_arm_depthwise_conv_s8(void) { stride2pad1_arm_depthwise_conv_s8(); }
-
 void test_depthwise_2_arm_depthwise_conv_s8(void) { depthwise_2_arm_depthwise_conv_s8(); }
 
-void test_depthwise_out_activation_arm_depthwise_conv_s8(void) { depthwise_out_activation_arm_depthwise_conv_s8(); }
-
 void test_depthwise_mult_batches_arm_depthwise_conv_s8(void) { depthwise_mult_batches_arm_depthwise_conv_s8(); }
 
-void test_depthwise_null_bias_0_arm_depthwise_conv_s8(void) { depthwise_null_bias_0_arm_depthwise_conv_s8(); }
-
 void test_depthwise_null_bias_1_arm_depthwise_conv_s8(void) { depthwise_null_bias_1_arm_depthwise_conv_s8(); }
 
 void test_depthwise_dilation_arm_depthwise_conv_s8(void) { depthwise_dilation_arm_depthwise_conv_s8(); }
-
-void test_depthwise_x_stride_arm_depthwise_conv_s8(void) { depthwise_x_stride_arm_depthwise_conv_s8(); }

+ 56 - 447
Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c

@@ -19,217 +19,13 @@
 #include <arm_nnfunctions.h>
 #include <unity.h>
 
-#include "../TestData/basic/test_data.h"
 #include "../TestData/depthwise_2/test_data.h"
 #include "../TestData/depthwise_dilation/test_data.h"
 #include "../TestData/depthwise_mult_batches/test_data.h"
-#include "../TestData/depthwise_null_bias_0/test_data.h"
 #include "../TestData/depthwise_null_bias_1/test_data.h"
-#include "../TestData/depthwise_out_activation/test_data.h"
-#include "../TestData/depthwise_x_stride/test_data.h"
-#include "../TestData/stride2pad1/test_data.h"
+#include "../Utils/utils.h"
 #include "../Utils/validate.h"
 
-const int32_t *get_bias_address(const int32_t *bias, int32_t size)
-{
-    const int32_t *return_bias = NULL;
-    for (int i = 0; i < size; i++)
-    {
-        if (bias[i] != 0)
-        {
-            return_bias = bias;
-            break;
-        }
-    }
-    return return_bias;
-}
-
-void basic_arm_depthwise_conv_s8(void)
-{
-    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
-    int8_t output[BASIC_DST_SIZE] = {0};
-
-    cmsis_nn_context ctx;
-    cmsis_nn_dw_conv_params dw_conv_params;
-    cmsis_nn_per_channel_quant_params quant_params;
-    cmsis_nn_dims input_dims;
-    cmsis_nn_dims filter_dims;
-    cmsis_nn_dims bias_dims = {};
-    cmsis_nn_dims output_dims;
-
-    const int32_t *bias_data = get_bias_address(basic_biases, BASIC_OUT_CH);
-    const int8_t *input_data = basic_input;
-
-    const int32_t output_ref_size = BASIC_DST_SIZE;
-    input_dims.n = BASIC_INPUT_BATCHES;
-    input_dims.w = BASIC_INPUT_W;
-    input_dims.h = BASIC_INPUT_H;
-    input_dims.c = BASIC_IN_CH;
-    filter_dims.w = BASIC_FILTER_X;
-    filter_dims.h = BASIC_FILTER_Y;
-    output_dims.w = BASIC_OUTPUT_W;
-    output_dims.h = BASIC_OUTPUT_H;
-    output_dims.c = BASIC_OUT_CH;
-
-    dw_conv_params.padding.w = BASIC_PAD_X;
-    dw_conv_params.padding.h = BASIC_PAD_Y;
-    dw_conv_params.stride.w = BASIC_STRIDE_X;
-    dw_conv_params.stride.h = BASIC_STRIDE_Y;
-    dw_conv_params.dilation.w = BASIC_DILATION_X;
-    dw_conv_params.dilation.h = BASIC_DILATION_Y;
-
-    dw_conv_params.ch_mult = 1;
-
-    dw_conv_params.input_offset = BASIC_INPUT_OFFSET;
-    dw_conv_params.output_offset = BASIC_OUTPUT_OFFSET;
-    dw_conv_params.activation.min = BASIC_OUT_ACTIVATION_MIN;
-    dw_conv_params.activation.max = BASIC_OUT_ACTIVATION_MAX;
-    quant_params.multiplier = (int32_t *)basic_output_mult;
-    quant_params.shift = (int32_t *)basic_output_shift;
-
-    ctx.buf = NULL;
-    ctx.size = 0;
-
-    arm_cmsis_nn_status result = arm_depthwise_conv_s8(&ctx,
-                                                       &dw_conv_params,
-                                                       &quant_params,
-                                                       &input_dims,
-                                                       input_data,
-                                                       &filter_dims,
-                                                       basic_weights,
-                                                       &bias_dims,
-                                                       bias_data,
-                                                       &output_dims,
-                                                       output);
-
-    if (ctx.buf)
-    {
-        // The caller is responsible to clear the scratch buffers for security reasons if applicable.
-        memset(ctx.buf, 0, ctx.size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, basic_output_ref, output_ref_size));
-    memset(output, 0, sizeof(output));
-
-    const int32_t buf_size =
-        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
-    ctx.buf = malloc(buf_size);
-
-    result = arm_depthwise_conv_wrapper_s8(&ctx,
-                                           &dw_conv_params,
-                                           &quant_params,
-                                           &input_dims,
-                                           input_data,
-                                           &filter_dims,
-                                           basic_weights,
-                                           &bias_dims,
-                                           bias_data,
-                                           &output_dims,
-                                           output);
-
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, buf_size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, basic_output_ref, BASIC_DST_SIZE));
-}
-
-void stride2pad1_arm_depthwise_conv_s8(void)
-{
-    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
-    int8_t output[STRIDE2PAD1_DST_SIZE] = {0};
-
-    cmsis_nn_context ctx;
-    cmsis_nn_dw_conv_params dw_conv_params;
-    cmsis_nn_per_channel_quant_params quant_params;
-    cmsis_nn_dims input_dims;
-    cmsis_nn_dims filter_dims;
-    cmsis_nn_dims bias_dims = {};
-    cmsis_nn_dims output_dims;
-
-    const int32_t output_ref_size = STRIDE2PAD1_DST_SIZE;
-    const int32_t *bias_data = get_bias_address(stride2pad1_biases, STRIDE2PAD1_OUT_CH);
-    const int8_t *kernel_data = stride2pad1_weights;
-    const int8_t *input_data = stride2pad1_input;
-
-    input_dims.n = STRIDE2PAD1_INPUT_BATCHES;
-    input_dims.w = STRIDE2PAD1_INPUT_W;
-    input_dims.h = STRIDE2PAD1_INPUT_H;
-    input_dims.c = STRIDE2PAD1_IN_CH;
-    filter_dims.w = STRIDE2PAD1_FILTER_X;
-    filter_dims.h = STRIDE2PAD1_FILTER_Y;
-    output_dims.w = STRIDE2PAD1_OUTPUT_W;
-    output_dims.h = STRIDE2PAD1_OUTPUT_H;
-    output_dims.c = STRIDE2PAD1_OUT_CH;
-
-    dw_conv_params.padding.w = STRIDE2PAD1_PAD_X;
-    dw_conv_params.padding.h = STRIDE2PAD1_PAD_Y;
-    dw_conv_params.stride.w = STRIDE2PAD1_STRIDE_X;
-    dw_conv_params.stride.h = STRIDE2PAD1_STRIDE_Y;
-    dw_conv_params.dilation.w = STRIDE2PAD1_DILATION_X;
-    dw_conv_params.dilation.h = STRIDE2PAD1_DILATION_Y;
-
-    dw_conv_params.ch_mult = 1;
-
-    dw_conv_params.input_offset = STRIDE2PAD1_INPUT_OFFSET;
-    dw_conv_params.output_offset = STRIDE2PAD1_OUTPUT_OFFSET;
-    dw_conv_params.activation.min = STRIDE2PAD1_OUT_ACTIVATION_MIN;
-    dw_conv_params.activation.max = STRIDE2PAD1_OUT_ACTIVATION_MAX;
-    quant_params.multiplier = (int32_t *)stride2pad1_output_mult;
-    quant_params.shift = (int32_t *)stride2pad1_output_shift;
-
-    ctx.buf = NULL;
-    ctx.size = 0;
-
-    arm_cmsis_nn_status result = arm_depthwise_conv_s8(&ctx,
-                                                       &dw_conv_params,
-                                                       &quant_params,
-                                                       &input_dims,
-                                                       input_data,
-                                                       &filter_dims,
-                                                       kernel_data,
-                                                       &bias_dims,
-                                                       bias_data,
-                                                       &output_dims,
-                                                       output);
-
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, ctx.size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, stride2pad1_output_ref, output_ref_size));
-    memset(output, 0, sizeof(output));
-
-    const int32_t buf_size =
-        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
-    ctx.buf = malloc(buf_size);
-    ctx.size = 0;
-    result = arm_depthwise_conv_wrapper_s8(&ctx,
-                                           &dw_conv_params,
-                                           &quant_params,
-                                           &input_dims,
-                                           input_data,
-                                           &filter_dims,
-                                           kernel_data,
-                                           &bias_dims,
-                                           bias_data,
-                                           &output_dims,
-                                           output);
-
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, buf_size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, stride2pad1_output_ref, output_ref_size));
-}
-
 void depthwise_2_arm_depthwise_conv_s8(void)
 {
     const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
@@ -300,8 +96,10 @@ void depthwise_2_arm_depthwise_conv_s8(void)
 
     const int32_t buf_size =
         arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+    TEST_ASSERT_EQUAL(buf_size, 0);
+
     ctx.buf = malloc(buf_size);
-    ctx.size = 0;
+    ctx.size = buf_size;
 
     result = arm_depthwise_conv_wrapper_s8(&ctx,
                                            &dw_conv_params,
@@ -321,98 +119,7 @@ void depthwise_2_arm_depthwise_conv_s8(void)
         free(ctx.buf);
     }
     TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, depthwise_2_output_ref, DEPTHWISE_2_DST_SIZE));
-}
-
-void depthwise_out_activation_arm_depthwise_conv_s8(void)
-{
-    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
-    int8_t output[DEPTHWISE_OUT_ACTIVATION_DST_SIZE] = {0};
-
-    cmsis_nn_context ctx;
-    cmsis_nn_dw_conv_params dw_conv_params;
-    cmsis_nn_per_channel_quant_params quant_params;
-    cmsis_nn_dims input_dims;
-    cmsis_nn_dims filter_dims;
-    cmsis_nn_dims bias_dims = {};
-    cmsis_nn_dims output_dims;
-
-    const int32_t output_ref_size = DEPTHWISE_OUT_ACTIVATION_DST_SIZE;
-    const int32_t *bias_data = get_bias_address(depthwise_out_activation_biases, DEPTHWISE_OUT_ACTIVATION_OUT_CH);
-    const int8_t *kernel_data = depthwise_out_activation_weights;
-    const int8_t *input_data = depthwise_out_activation_input;
-
-    input_dims.n = DEPTHWISE_OUT_ACTIVATION_INPUT_BATCHES;
-    input_dims.w = DEPTHWISE_OUT_ACTIVATION_INPUT_W;
-    input_dims.h = DEPTHWISE_OUT_ACTIVATION_INPUT_H;
-    input_dims.c = DEPTHWISE_OUT_ACTIVATION_IN_CH;
-    filter_dims.w = DEPTHWISE_OUT_ACTIVATION_FILTER_X;
-    filter_dims.h = DEPTHWISE_OUT_ACTIVATION_FILTER_Y;
-    output_dims.w = DEPTHWISE_OUT_ACTIVATION_OUTPUT_W;
-    output_dims.h = DEPTHWISE_OUT_ACTIVATION_OUTPUT_H;
-    output_dims.c = DEPTHWISE_OUT_ACTIVATION_OUT_CH;
-
-    dw_conv_params.padding.w = DEPTHWISE_OUT_ACTIVATION_PAD_X;
-    dw_conv_params.padding.h = DEPTHWISE_OUT_ACTIVATION_PAD_Y;
-    dw_conv_params.stride.w = DEPTHWISE_OUT_ACTIVATION_STRIDE_X;
-    dw_conv_params.stride.h = DEPTHWISE_OUT_ACTIVATION_STRIDE_Y;
-    dw_conv_params.ch_mult = DEPTHWISE_OUT_ACTIVATION_CH_MULT;
-    dw_conv_params.dilation.w = DEPTHWISE_OUT_ACTIVATION_DILATION_X;
-    dw_conv_params.dilation.h = DEPTHWISE_OUT_ACTIVATION_DILATION_Y;
-
-    dw_conv_params.input_offset = DEPTHWISE_OUT_ACTIVATION_INPUT_OFFSET;
-    dw_conv_params.output_offset = DEPTHWISE_OUT_ACTIVATION_OUTPUT_OFFSET;
-    dw_conv_params.activation.min = DEPTHWISE_OUT_ACTIVATION_OUT_ACTIVATION_MIN;
-    dw_conv_params.activation.max = DEPTHWISE_OUT_ACTIVATION_OUT_ACTIVATION_MAX;
-    quant_params.multiplier = (int32_t *)depthwise_out_activation_output_mult;
-    quant_params.shift = (int32_t *)depthwise_out_activation_output_shift;
-
-    ctx.buf = NULL;
-    ctx.size = 0;
-
-    arm_cmsis_nn_status result = arm_depthwise_conv_s8(&ctx,
-                                                       &dw_conv_params,
-                                                       &quant_params,
-                                                       &input_dims,
-                                                       input_data,
-                                                       &filter_dims,
-                                                       kernel_data,
-                                                       &bias_dims,
-                                                       bias_data,
-                                                       &output_dims,
-                                                       output);
-
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, ctx.size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, depthwise_out_activation_output_ref, output_ref_size));
-    memset(output, 0, sizeof(output));
-
-    ctx.buf = NULL;
-    ctx.size = 0;
-
-    result = arm_depthwise_conv_s8(&ctx,
-                                   &dw_conv_params,
-                                   &quant_params,
-                                   &input_dims,
-                                   input_data,
-                                   &filter_dims,
-                                   kernel_data,
-                                   &bias_dims,
-                                   bias_data,
-                                   &output_dims,
-                                   output);
-
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, ctx.size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, depthwise_out_activation_output_ref, DEPTHWISE_OUT_ACTIVATION_DST_SIZE));
+    TEST_ASSERT_TRUE(validate(output, depthwise_2_output_ref, output_ref_size));
 }
 
 void depthwise_mult_batches_arm_depthwise_conv_s8(void)
@@ -481,97 +188,34 @@ void depthwise_mult_batches_arm_depthwise_conv_s8(void)
     }
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_mult_batches_output_ref, output_ref_size));
-    memset(output, 0, sizeof(output));
 
-    ctx.buf = NULL;
-    ctx.size = 0;
-
-    result = arm_depthwise_conv_s8(&ctx,
-                                   &dw_conv_params,
-                                   &quant_params,
-                                   &input_dims,
-                                   input_data,
-                                   &filter_dims,
-                                   kernel_data,
-                                   &bias_dims,
-                                   bias_data,
-                                   &output_dims,
-                                   output);
-
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, ctx.size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, depthwise_mult_batches_output_ref, DEPTHWISE_MULT_BATCHES_DST_SIZE));
-}
-
-void depthwise_null_bias_0_arm_depthwise_conv_s8(void)
-{
-    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
-    int8_t output[DEPTHWISE_NULL_BIAS_0_DST_SIZE] = {0};
-
-    cmsis_nn_context ctx;
-    cmsis_nn_dw_conv_params dw_conv_params;
-    cmsis_nn_per_channel_quant_params quant_params;
-    cmsis_nn_dims input_dims;
-    cmsis_nn_dims filter_dims;
-    cmsis_nn_dims bias_dims = {};
-    cmsis_nn_dims output_dims;
+    const int32_t buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
 
-    const int32_t *bias_data = get_bias_address(depthwise_null_bias_0_biases, DEPTHWISE_NULL_BIAS_0_OUT_CH);
-    const int8_t *kernel_data = depthwise_null_bias_0_weights;
-    const int8_t *input_data = depthwise_null_bias_0_input;
-
-    input_dims.n = DEPTHWISE_NULL_BIAS_0_INPUT_BATCHES;
-    input_dims.w = DEPTHWISE_NULL_BIAS_0_INPUT_W;
-    input_dims.h = DEPTHWISE_NULL_BIAS_0_INPUT_H;
-    input_dims.c = DEPTHWISE_NULL_BIAS_0_IN_CH;
-    filter_dims.w = DEPTHWISE_NULL_BIAS_0_FILTER_X;
-    filter_dims.h = DEPTHWISE_NULL_BIAS_0_FILTER_Y;
-    output_dims.w = DEPTHWISE_NULL_BIAS_0_OUTPUT_W;
-    output_dims.h = DEPTHWISE_NULL_BIAS_0_OUTPUT_H;
-    output_dims.c = DEPTHWISE_NULL_BIAS_0_OUT_CH;
-
-    dw_conv_params.padding.w = DEPTHWISE_NULL_BIAS_0_PAD_X;
-    dw_conv_params.padding.h = DEPTHWISE_NULL_BIAS_0_PAD_Y;
-    dw_conv_params.stride.w = DEPTHWISE_NULL_BIAS_0_STRIDE_X;
-    dw_conv_params.stride.h = DEPTHWISE_NULL_BIAS_0_STRIDE_Y;
-    dw_conv_params.dilation.w = DEPTHWISE_NULL_BIAS_0_DILATION_X;
-    dw_conv_params.dilation.h = DEPTHWISE_NULL_BIAS_0_DILATION_Y;
-
-    dw_conv_params.ch_mult = DEPTHWISE_NULL_BIAS_0_CH_MULT;
-
-    dw_conv_params.input_offset = DEPTHWISE_NULL_BIAS_0_INPUT_OFFSET;
-    dw_conv_params.output_offset = DEPTHWISE_NULL_BIAS_0_OUTPUT_OFFSET;
-    dw_conv_params.activation.min = DEPTHWISE_NULL_BIAS_0_OUT_ACTIVATION_MIN;
-    dw_conv_params.activation.max = DEPTHWISE_NULL_BIAS_0_OUT_ACTIVATION_MAX;
-    quant_params.multiplier = (int32_t *)depthwise_null_bias_0_output_mult;
-    quant_params.shift = (int32_t *)depthwise_null_bias_0_output_shift;
+    TEST_ASSERT_EQUAL(buf_size, 0);
 
-    ctx.buf = NULL;
-    ctx.size = 0;
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
 
-    arm_cmsis_nn_status result = arm_depthwise_conv_s8(&ctx,
-                                                       &dw_conv_params,
-                                                       &quant_params,
-                                                       &input_dims,
-                                                       input_data,
-                                                       &filter_dims,
-                                                       kernel_data,
-                                                       &bias_dims,
-                                                       bias_data,
-                                                       &output_dims,
-                                                       output);
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
 
     if (ctx.buf)
     {
-        memset(ctx.buf, 0, ctx.size);
+        memset(ctx.buf, 0, buf_size);
         free(ctx.buf);
     }
     TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, depthwise_null_bias_0_output_ref, DEPTHWISE_NULL_BIAS_0_DST_SIZE));
+    TEST_ASSERT_TRUE(validate(output, depthwise_mult_batches_output_ref, output_ref_size));
 }
 
 void depthwise_null_bias_1_arm_depthwise_conv_s8(void)
@@ -639,6 +283,34 @@ void depthwise_null_bias_1_arm_depthwise_conv_s8(void)
     }
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_null_bias_1_output_ref, DEPTHWISE_NULL_BIAS_1_DST_SIZE));
+
+    const int32_t buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(buf_size, 0);
+
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_null_bias_1_output_ref, DEPTHWISE_NULL_BIAS_1_DST_SIZE));
 }
 
 void depthwise_dilation_arm_depthwise_conv_s8(void)
@@ -711,7 +383,11 @@ void depthwise_dilation_arm_depthwise_conv_s8(void)
 
     const int32_t buf_size =
         arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
-    TEST_ASSERT_EQUAL(0, buf_size);
+    TEST_ASSERT_EQUAL(buf_size, 0);
+
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
     result = arm_depthwise_conv_wrapper_s8(&ctx,
                                            &dw_conv_params,
                                            &quant_params,
@@ -726,70 +402,3 @@ void depthwise_dilation_arm_depthwise_conv_s8(void)
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_dilation_output_ref, output_ref_size));
 }
-
-void depthwise_x_stride_arm_depthwise_conv_s8(void)
-{
-    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
-    int8_t output[DEPTHWISE_X_STRIDE_DST_SIZE] = {0};
-
-    cmsis_nn_context ctx;
-    cmsis_nn_dw_conv_params dw_conv_params;
-    cmsis_nn_per_channel_quant_params quant_params;
-    cmsis_nn_dims input_dims;
-    cmsis_nn_dims filter_dims;
-    cmsis_nn_dims bias_dims;
-    cmsis_nn_dims output_dims;
-
-    const int32_t *bias_data = get_bias_address(depthwise_x_stride_biases, DEPTHWISE_X_STRIDE_IN_CH);
-    const int8_t *kernel_data = depthwise_x_stride_weights;
-    const int8_t *input_data = depthwise_x_stride_input;
-
-    input_dims.n = DEPTHWISE_X_STRIDE_INPUT_BATCHES;
-    input_dims.w = DEPTHWISE_X_STRIDE_INPUT_W;
-    input_dims.h = DEPTHWISE_X_STRIDE_INPUT_H;
-    input_dims.c = DEPTHWISE_X_STRIDE_IN_CH;
-    filter_dims.w = DEPTHWISE_X_STRIDE_FILTER_X;
-    filter_dims.h = DEPTHWISE_X_STRIDE_FILTER_Y;
-    output_dims.w = DEPTHWISE_X_STRIDE_OUTPUT_W;
-    output_dims.h = DEPTHWISE_X_STRIDE_OUTPUT_H;
-    output_dims.c = DEPTHWISE_X_STRIDE_OUT_CH;
-
-    dw_conv_params.padding.w = DEPTHWISE_X_STRIDE_PAD_X;
-    dw_conv_params.padding.h = DEPTHWISE_X_STRIDE_PAD_Y;
-    dw_conv_params.stride.w = DEPTHWISE_X_STRIDE_STRIDE_X;
-    dw_conv_params.stride.h = DEPTHWISE_X_STRIDE_STRIDE_Y;
-    dw_conv_params.dilation.w = DEPTHWISE_X_STRIDE_DILATION_X;
-    dw_conv_params.dilation.h = DEPTHWISE_X_STRIDE_DILATION_Y;
-
-    dw_conv_params.ch_mult = 1;
-
-    dw_conv_params.input_offset = DEPTHWISE_X_STRIDE_INPUT_OFFSET;
-    dw_conv_params.output_offset = DEPTHWISE_X_STRIDE_OUTPUT_OFFSET;
-    dw_conv_params.activation.min = DEPTHWISE_X_STRIDE_OUT_ACTIVATION_MIN;
-    dw_conv_params.activation.max = DEPTHWISE_X_STRIDE_OUT_ACTIVATION_MAX;
-    quant_params.multiplier = (int32_t *)depthwise_x_stride_output_mult;
-    quant_params.shift = (int32_t *)depthwise_x_stride_output_shift;
-
-    ctx.buf = NULL;
-    ctx.size = arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
-    ctx.buf = malloc(ctx.size);
-
-    arm_cmsis_nn_status result = arm_depthwise_conv_s8(&ctx,
-                                                       &dw_conv_params,
-                                                       &quant_params,
-                                                       &input_dims,
-                                                       input_data,
-                                                       &filter_dims,
-                                                       kernel_data,
-                                                       &bias_dims,
-                                                       bias_data,
-                                                       &output_dims,
-                                                       output);
-    if (ctx.buf)
-    {
-        memset(ctx.buf, 0, ctx.size);
-        free(ctx.buf);
-    }
-    TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, depthwise_x_stride_output_ref, DEPTHWISE_X_STRIDE_DST_SIZE));
-}

+ 9 - 2
Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8_opt/Unity/unity_test_arm_depthwise_conv_s8_opt.c

@@ -46,8 +46,15 @@ void tearDown(void) {}
 
 void test_basic_arm_depthwise_conv_s8_opt(void) { basic_arm_depthwise_conv_s8_opt(); }
 
-void test_stride2pad1_arm_depthwise_conv_s8_opt(void) { stride2pad1_arm_depthwise_conv_s8_opt(); }
-
 void test_depthwise_eq_in_out_ch_arm_depthwise_conv_s8_opt(void) { depthwise_eq_in_out_ch_arm_depthwise_conv_s8_opt(); }
 
 void test_depthwise_sub_block_arm_depthwise_conv_s8_opt(void) { depthwise_sub_block_arm_depthwise_conv_s8_opt(); }
+
+void test_depthwise_out_activation_arm_depthwise_conv_s8_opt(void)
+{
+    depthwise_out_activation_arm_depthwise_conv_s8_opt();
+}
+
+void test_depthwise_null_bias_0_arm_depthwise_conv_s8_opt(void) { depthwise_null_bias_0_arm_depthwise_conv_s8_opt(); }
+
+void test_depthwise_x_stride_arm_depthwise_conv_s8_opt(void) { depthwise_x_stride_arm_depthwise_conv_s8_opt(); }

+ 401 - 73
Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8_opt/test_arm_depthwise_conv_s8_opt.c

@@ -21,26 +21,14 @@
 #include <unity.h>
 
 #include "../TestData/basic/test_data.h"
-#include "../TestData/depthwise_2/test_data.h"
 #include "../TestData/depthwise_eq_in_out_ch/test_data.h"
+#include "../TestData/depthwise_null_bias_0/test_data.h"
+#include "../TestData/depthwise_out_activation/test_data.h"
 #include "../TestData/depthwise_sub_block/test_data.h"
-#include "../TestData/stride2pad1/test_data.h"
+#include "../TestData/depthwise_x_stride/test_data.h"
+#include "../Utils/utils.h"
 #include "../Utils/validate.h"
 
-const int32_t *get_bias_address(const int32_t *bias, int32_t size)
-{
-    const int32_t *return_bias = NULL;
-    for (int i = 0; i < size; i++)
-    {
-        if (bias[i] != 0)
-        {
-            return_bias = bias;
-            break;
-        }
-    }
-    return return_bias;
-}
-
 void basic_arm_depthwise_conv_s8_opt(void)
 {
     const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
@@ -85,6 +73,13 @@ void basic_arm_depthwise_conv_s8_opt(void)
     quant_params.shift = (int32_t *)basic_output_shift;
 
     ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
+
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(ctx.size > 0);
+#else
+    TEST_ASSERT_EQUAL(ctx.size, 0);
+#endif
+
     ctx.buf = malloc(ctx.size);
 
     arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
@@ -107,73 +102,33 @@ void basic_arm_depthwise_conv_s8_opt(void)
     }
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, basic_output_ref, BASIC_DST_SIZE));
-}
-
-void stride2pad1_arm_depthwise_conv_s8_opt(void)
-{
-    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
-    int8_t output[STRIDE2PAD1_DST_SIZE] = {0};
-
-    cmsis_nn_context ctx;
-    cmsis_nn_dw_conv_params dw_conv_params;
-    cmsis_nn_per_channel_quant_params quant_params;
-    cmsis_nn_dims input_dims;
-    cmsis_nn_dims filter_dims;
-    cmsis_nn_dims bias_dims;
-    cmsis_nn_dims output_dims;
 
-    const int32_t *bias_data = stride2pad1_biases;
-    const int8_t *kernel_data = stride2pad1_weights;
-    const int8_t *input_data = stride2pad1_input;
-
-    input_dims.n = STRIDE2PAD1_INPUT_BATCHES;
-    input_dims.w = STRIDE2PAD1_INPUT_W;
-    input_dims.h = STRIDE2PAD1_INPUT_H;
-    input_dims.c = STRIDE2PAD1_IN_CH;
-    filter_dims.w = STRIDE2PAD1_FILTER_X;
-    filter_dims.h = STRIDE2PAD1_FILTER_Y;
-    output_dims.w = STRIDE2PAD1_OUTPUT_W;
-    output_dims.h = STRIDE2PAD1_OUTPUT_H;
-    output_dims.c = STRIDE2PAD1_OUT_CH;
-
-    dw_conv_params.padding.w = STRIDE2PAD1_PAD_X;
-    dw_conv_params.padding.h = STRIDE2PAD1_PAD_Y;
-    dw_conv_params.stride.w = STRIDE2PAD1_STRIDE_X;
-    dw_conv_params.stride.h = STRIDE2PAD1_STRIDE_Y;
-    dw_conv_params.dilation.w = STRIDE2PAD1_DILATION_X;
-    dw_conv_params.dilation.h = STRIDE2PAD1_DILATION_Y;
+    const int32_t wrapper_buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
 
-    dw_conv_params.ch_mult = 1;
+    TEST_ASSERT_EQUAL(wrapper_buf_size, ctx.size);
 
-    dw_conv_params.input_offset = STRIDE2PAD1_INPUT_OFFSET;
-    dw_conv_params.output_offset = STRIDE2PAD1_OUTPUT_OFFSET;
-    dw_conv_params.activation.min = STRIDE2PAD1_OUT_ACTIVATION_MIN;
-    dw_conv_params.activation.max = STRIDE2PAD1_OUT_ACTIVATION_MAX;
-    quant_params.multiplier = (int32_t *)stride2pad1_output_mult;
-    quant_params.shift = (int32_t *)stride2pad1_output_shift;
+    ctx.buf = malloc(wrapper_buf_size);
 
-    ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
-    ctx.buf = malloc(ctx.size);
-
-    arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
-                                                           &dw_conv_params,
-                                                           &quant_params,
-                                                           &input_dims,
-                                                           input_data,
-                                                           &filter_dims,
-                                                           kernel_data,
-                                                           &bias_dims,
-                                                           bias_data,
-                                                           &output_dims,
-                                                           output);
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
 
     if (ctx.buf)
     {
-        memset(ctx.buf, 0, ctx.size);
+        memset(ctx.buf, 0, wrapper_buf_size);
         free(ctx.buf);
     }
     TEST_ASSERT_EQUAL(expected, result);
-    TEST_ASSERT_TRUE(validate(output, stride2pad1_output_ref, STRIDE2PAD1_DST_SIZE));
+    TEST_ASSERT_TRUE(validate(output, basic_output_ref, BASIC_DST_SIZE));
 }
 
 void depthwise_eq_in_out_ch_arm_depthwise_conv_s8_opt(void)
@@ -220,6 +175,13 @@ void depthwise_eq_in_out_ch_arm_depthwise_conv_s8_opt(void)
     quant_params.shift = (int32_t *)depthwise_eq_in_out_ch_output_shift;
 
     ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
+
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(ctx.size > 0);
+#else
+    TEST_ASSERT_EQUAL(ctx.size, 0);
+#endif
+
     ctx.buf = malloc(ctx.size);
 
     arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
@@ -241,6 +203,33 @@ void depthwise_eq_in_out_ch_arm_depthwise_conv_s8_opt(void)
     }
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_eq_in_out_ch_output_ref, DEPTHWISE_EQ_IN_OUT_CH_DST_SIZE));
+
+    const int32_t wrapper_buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(wrapper_buf_size, ctx.size);
+
+    ctx.buf = malloc(wrapper_buf_size);
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, wrapper_buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_eq_in_out_ch_output_ref, DEPTHWISE_EQ_IN_OUT_CH_DST_SIZE));
 }
 
 void depthwise_sub_block_arm_depthwise_conv_s8_opt(void)
@@ -287,6 +276,13 @@ void depthwise_sub_block_arm_depthwise_conv_s8_opt(void)
     quant_params.shift = (int32_t *)depthwise_sub_block_output_shift;
 
     ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
+
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(ctx.size > 0);
+#else
+    TEST_ASSERT_EQUAL(ctx.size, 0);
+#endif
+
     ctx.buf = malloc(ctx.size);
 
     arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
@@ -307,4 +303,336 @@ void depthwise_sub_block_arm_depthwise_conv_s8_opt(void)
     }
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate(output, depthwise_sub_block_output_ref, DEPTHWISE_SUB_BLOCK_DST_SIZE));
+
+    const int32_t wrapper_buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(wrapper_buf_size, ctx.size);
+
+    ctx.buf = malloc(wrapper_buf_size);
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, wrapper_buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_sub_block_output_ref, DEPTHWISE_SUB_BLOCK_DST_SIZE));
+}
+
+void depthwise_out_activation_arm_depthwise_conv_s8_opt(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[DEPTHWISE_OUT_ACTIVATION_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_dw_conv_params dw_conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims = {};
+    cmsis_nn_dims output_dims;
+
+    const int32_t output_ref_size = DEPTHWISE_OUT_ACTIVATION_DST_SIZE;
+    const int32_t *bias_data = get_bias_address(depthwise_out_activation_biases, DEPTHWISE_OUT_ACTIVATION_OUT_CH);
+    const int8_t *kernel_data = depthwise_out_activation_weights;
+    const int8_t *input_data = depthwise_out_activation_input;
+
+    input_dims.n = DEPTHWISE_OUT_ACTIVATION_INPUT_BATCHES;
+    input_dims.w = DEPTHWISE_OUT_ACTIVATION_INPUT_W;
+    input_dims.h = DEPTHWISE_OUT_ACTIVATION_INPUT_H;
+    input_dims.c = DEPTHWISE_OUT_ACTIVATION_IN_CH;
+    filter_dims.w = DEPTHWISE_OUT_ACTIVATION_FILTER_X;
+    filter_dims.h = DEPTHWISE_OUT_ACTIVATION_FILTER_Y;
+    output_dims.w = DEPTHWISE_OUT_ACTIVATION_OUTPUT_W;
+    output_dims.h = DEPTHWISE_OUT_ACTIVATION_OUTPUT_H;
+    output_dims.c = DEPTHWISE_OUT_ACTIVATION_OUT_CH;
+
+    dw_conv_params.padding.w = DEPTHWISE_OUT_ACTIVATION_PAD_X;
+    dw_conv_params.padding.h = DEPTHWISE_OUT_ACTIVATION_PAD_Y;
+    dw_conv_params.stride.w = DEPTHWISE_OUT_ACTIVATION_STRIDE_X;
+    dw_conv_params.stride.h = DEPTHWISE_OUT_ACTIVATION_STRIDE_Y;
+    dw_conv_params.ch_mult = DEPTHWISE_OUT_ACTIVATION_CH_MULT;
+    dw_conv_params.dilation.w = DEPTHWISE_OUT_ACTIVATION_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_OUT_ACTIVATION_DILATION_Y;
+
+    dw_conv_params.input_offset = DEPTHWISE_OUT_ACTIVATION_INPUT_OFFSET;
+    dw_conv_params.output_offset = DEPTHWISE_OUT_ACTIVATION_OUTPUT_OFFSET;
+    dw_conv_params.activation.min = DEPTHWISE_OUT_ACTIVATION_OUT_ACTIVATION_MIN;
+    dw_conv_params.activation.max = DEPTHWISE_OUT_ACTIVATION_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)depthwise_out_activation_output_mult;
+    quant_params.shift = (int32_t *)depthwise_out_activation_output_shift;
+
+    ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
+
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(ctx.size > 0);
+#else
+    TEST_ASSERT_EQUAL(ctx.size, 0);
+#endif
+
+    ctx.buf = malloc(ctx.size);
+
+    arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
+                                                           &dw_conv_params,
+                                                           &quant_params,
+                                                           &input_dims,
+                                                           input_data,
+                                                           &filter_dims,
+                                                           kernel_data,
+                                                           &bias_dims,
+                                                           bias_data,
+                                                           &output_dims,
+                                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, ctx.size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_out_activation_output_ref, output_ref_size));
+    memset(output, 0, sizeof(output));
+
+    const int32_t buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(buf_size, ctx.size);
+
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_out_activation_output_ref, output_ref_size));
+}
+
+void depthwise_null_bias_0_arm_depthwise_conv_s8_opt(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[DEPTHWISE_NULL_BIAS_0_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_dw_conv_params dw_conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims = {};
+    cmsis_nn_dims output_dims;
+
+    const int32_t *bias_data = get_bias_address(depthwise_null_bias_0_biases, DEPTHWISE_NULL_BIAS_0_OUT_CH);
+    const int8_t *kernel_data = depthwise_null_bias_0_weights;
+    const int8_t *input_data = depthwise_null_bias_0_input;
+
+    input_dims.n = DEPTHWISE_NULL_BIAS_0_INPUT_BATCHES;
+    input_dims.w = DEPTHWISE_NULL_BIAS_0_INPUT_W;
+    input_dims.h = DEPTHWISE_NULL_BIAS_0_INPUT_H;
+    input_dims.c = DEPTHWISE_NULL_BIAS_0_IN_CH;
+    filter_dims.w = DEPTHWISE_NULL_BIAS_0_FILTER_X;
+    filter_dims.h = DEPTHWISE_NULL_BIAS_0_FILTER_Y;
+    output_dims.w = DEPTHWISE_NULL_BIAS_0_OUTPUT_W;
+    output_dims.h = DEPTHWISE_NULL_BIAS_0_OUTPUT_H;
+    output_dims.c = DEPTHWISE_NULL_BIAS_0_OUT_CH;
+
+    dw_conv_params.padding.w = DEPTHWISE_NULL_BIAS_0_PAD_X;
+    dw_conv_params.padding.h = DEPTHWISE_NULL_BIAS_0_PAD_Y;
+    dw_conv_params.stride.w = DEPTHWISE_NULL_BIAS_0_STRIDE_X;
+    dw_conv_params.stride.h = DEPTHWISE_NULL_BIAS_0_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_NULL_BIAS_0_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_NULL_BIAS_0_DILATION_Y;
+
+    dw_conv_params.ch_mult = DEPTHWISE_NULL_BIAS_0_CH_MULT;
+
+    dw_conv_params.input_offset = DEPTHWISE_NULL_BIAS_0_INPUT_OFFSET;
+    dw_conv_params.output_offset = DEPTHWISE_NULL_BIAS_0_OUTPUT_OFFSET;
+    dw_conv_params.activation.min = DEPTHWISE_NULL_BIAS_0_OUT_ACTIVATION_MIN;
+    dw_conv_params.activation.max = DEPTHWISE_NULL_BIAS_0_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)depthwise_null_bias_0_output_mult;
+    quant_params.shift = (int32_t *)depthwise_null_bias_0_output_shift;
+
+    ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
+
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(ctx.size > 0);
+#else
+    TEST_ASSERT_EQUAL(ctx.size, 0);
+#endif
+
+    ctx.buf = malloc(ctx.size);
+
+    arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
+                                                           &dw_conv_params,
+                                                           &quant_params,
+                                                           &input_dims,
+                                                           input_data,
+                                                           &filter_dims,
+                                                           kernel_data,
+                                                           &bias_dims,
+                                                           bias_data,
+                                                           &output_dims,
+                                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, ctx.size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_null_bias_0_output_ref, DEPTHWISE_NULL_BIAS_0_DST_SIZE));
+
+    const int32_t buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(buf_size, ctx.size);
+
+    ctx.buf = malloc(buf_size);
+    ctx.size = buf_size;
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_null_bias_0_output_ref, DEPTHWISE_NULL_BIAS_0_DST_SIZE));
+}
+
+void depthwise_x_stride_arm_depthwise_conv_s8_opt(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int8_t output[DEPTHWISE_X_STRIDE_DST_SIZE] = {0};
+
+    cmsis_nn_context ctx;
+    cmsis_nn_dw_conv_params dw_conv_params;
+    cmsis_nn_per_channel_quant_params quant_params;
+    cmsis_nn_dims input_dims;
+    cmsis_nn_dims filter_dims;
+    cmsis_nn_dims bias_dims;
+    cmsis_nn_dims output_dims;
+
+    const int32_t *bias_data = get_bias_address(depthwise_x_stride_biases, DEPTHWISE_X_STRIDE_IN_CH);
+    const int8_t *kernel_data = depthwise_x_stride_weights;
+    const int8_t *input_data = depthwise_x_stride_input;
+
+    input_dims.n = DEPTHWISE_X_STRIDE_INPUT_BATCHES;
+    input_dims.w = DEPTHWISE_X_STRIDE_INPUT_W;
+    input_dims.h = DEPTHWISE_X_STRIDE_INPUT_H;
+    input_dims.c = DEPTHWISE_X_STRIDE_IN_CH;
+    filter_dims.w = DEPTHWISE_X_STRIDE_FILTER_X;
+    filter_dims.h = DEPTHWISE_X_STRIDE_FILTER_Y;
+    output_dims.w = DEPTHWISE_X_STRIDE_OUTPUT_W;
+    output_dims.h = DEPTHWISE_X_STRIDE_OUTPUT_H;
+    output_dims.c = DEPTHWISE_X_STRIDE_OUT_CH;
+
+    dw_conv_params.padding.w = DEPTHWISE_X_STRIDE_PAD_X;
+    dw_conv_params.padding.h = DEPTHWISE_X_STRIDE_PAD_Y;
+    dw_conv_params.stride.w = DEPTHWISE_X_STRIDE_STRIDE_X;
+    dw_conv_params.stride.h = DEPTHWISE_X_STRIDE_STRIDE_Y;
+    dw_conv_params.dilation.w = DEPTHWISE_X_STRIDE_DILATION_X;
+    dw_conv_params.dilation.h = DEPTHWISE_X_STRIDE_DILATION_Y;
+
+    dw_conv_params.ch_mult = 1;
+
+    dw_conv_params.input_offset = DEPTHWISE_X_STRIDE_INPUT_OFFSET;
+    dw_conv_params.output_offset = DEPTHWISE_X_STRIDE_OUTPUT_OFFSET;
+    dw_conv_params.activation.min = DEPTHWISE_X_STRIDE_OUT_ACTIVATION_MIN;
+    dw_conv_params.activation.max = DEPTHWISE_X_STRIDE_OUT_ACTIVATION_MAX;
+    quant_params.multiplier = (int32_t *)depthwise_x_stride_output_mult;
+    quant_params.shift = (int32_t *)depthwise_x_stride_output_shift;
+
+    ctx.size = arm_depthwise_conv_s8_opt_get_buffer_size(&input_dims, &filter_dims);
+
+#if defined(ARM_MATH_DSP)
+    TEST_ASSERT_TRUE(ctx.size > 0);
+#else
+    TEST_ASSERT_EQUAL(ctx.size, 0);
+#endif
+
+    ctx.buf = malloc(ctx.size);
+
+    arm_cmsis_nn_status result = arm_depthwise_conv_s8_opt(&ctx,
+                                                           &dw_conv_params,
+                                                           &quant_params,
+                                                           &input_dims,
+                                                           input_data,
+                                                           &filter_dims,
+                                                           kernel_data,
+                                                           &bias_dims,
+                                                           bias_data,
+                                                           &output_dims,
+                                                           output);
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, ctx.size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_x_stride_output_ref, DEPTHWISE_X_STRIDE_DST_SIZE));
+
+    const int32_t wrapper_buf_size =
+        arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    TEST_ASSERT_EQUAL(wrapper_buf_size, ctx.size);
+
+    ctx.buf = malloc(wrapper_buf_size);
+
+    result = arm_depthwise_conv_wrapper_s8(&ctx,
+                                           &dw_conv_params,
+                                           &quant_params,
+                                           &input_dims,
+                                           input_data,
+                                           &filter_dims,
+                                           kernel_data,
+                                           &bias_dims,
+                                           bias_data,
+                                           &output_dims,
+                                           output);
+
+    if (ctx.buf)
+    {
+        memset(ctx.buf, 0, wrapper_buf_size);
+        free(ctx.buf);
+    }
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate(output, depthwise_x_stride_output_ref, DEPTHWISE_X_STRIDE_DST_SIZE));
 }