6 лет назад · 5c093c0d85
--- a/CMSIS/DSP/Testing/preprocess.py
+++ b/CMSIS/DSP/Testing/preprocess.py
@@ -1,18 +1,18 @@
 
				-import argparse

			
 
				-import TestScripts.NewParser as parse

			
 
				-import pickle

			
 
				-

			
 
				-parser = argparse.ArgumentParser(description='Parse test description')

			
 
				-

			
 
				-parser.add_argument('-f', nargs='?',type = str, default=None, help="Test description file path")

			
 
				-

			
 
				-parser.add_argument('-o', nargs='?',type = str, default="Output.pickle", help="output file for parsed description")

			
 
				-

			
 
				-args = parser.parse_args()

			
 
				-

			
 
				-if args.f is not None:

			
 
				-    p = parse.Parser()

			
 
				-    # Parse the test description file

			
 
				-    root = p.parse(args.f)

			
 
				-    with open(args.o,"wb") as output:

			
 
				-         pickle.dump(root, output)

			
 
				+import argparse
			
 
				+import TestScripts.NewParser as parse
			
 
				+import pickle
			
 
				+
			
 
				+parser = argparse.ArgumentParser(description='Parse test description')
			
 
				+
			
 
				+parser.add_argument('-f', nargs='?',type = str, default=None, help="Test description file path")
			
 
				+
			
 
				+parser.add_argument('-o', nargs='?',type = str, default="Output.pickle", help="output file for parsed description")
			
 
				+
			
 
				+args = parser.parse_args()
			
 
				+
			
 
				+if args.f is not None:
			
 
				+    p = parse.Parser()
			
 
				+    # Parse the test description file
			
 
				+    root = p.parse(args.f)
			
 
				+    with open(args.o,"wb") as output:
			
 
				+         pickle.dump(root, output)
			
--- a/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/CMSIS/NN/Include/arm_nnfunctions.h
@@ -1724,7 +1724,180 @@ void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out
 
				                         int8_t *output,
			
 
				                         const uint32_t total_size);
			
 
				 
			
 
				+  /**
			
 
				+   * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis
			
 
				+   *        This function should be called for each input tensor to concatenate. The argument offset_x
			
 
				+   *        will be used to store the input tensor in the correct position in the output tensor
			
 
				+   *
			
 
				+   *        i.e.    offset_x = 0
			
 
				+   *                for(i = 0 i < num_input_tensors; ++i)
			
 
				+   *                {
			
 
				+   *                    arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x)
			
 
				+   *                    offset_x += input_x[i]
			
 
				+   *                }
			
 
				+   *
			
 
				+   *        This function assumes that the output tensor has:
			
 
				+   *        -# The same height of the input tensor
			
 
				+   *        -# The same number of channels of the input tensor
			
 
				+   *        -# The same batch size of the input tensor
			
 
				+   *
			
 
				+   *        Unless specified otherwise, arguments are mandatory.
			
 
				+   *
			
 
				+   * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation
			
 
				+   *
			
 
				+   * @param[in]  input    Pointer to input tensor
			
 
				+   * @param[in]  input_x  Width of input tensor
			
 
				+   * @param[in]  input_y  Height of input tensor
			
 
				+   * @param[in]  input_z  Channels in input tensor
			
 
				+   * @param[in]  input_w  Batch size in input tensor
			
 
				+   * @param[out] output   Pointer to output tensor
			
 
				+   * @param[in]  output_x Width of output tensor
			
 
				+   * @param[in]  offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor
			
 
				+   *                      It is user responsibility to provide the correct value
			
 
				+   * @return     None
			
 
				+   *
			
 
				+   * <b> Input constraints</b>
			
 
				+   * offset_x is less than output_x
			
 
				+   *
			
 
				+   */
			
 
				+    void arm_concatenation_s8_x(const int8_t *input,
			
 
				+                                const uint16_t input_x,
			
 
				+                                const uint16_t input_y,
			
 
				+                                const uint16_t input_z,
			
 
				+                                const uint16_t input_w,
			
 
				+                                int8_t *output,
			
 
				+                                const uint16_t output_x,
			
 
				+                                const uint32_t offset_x);
			
 
				+
			
 
				+  /**
			
 
				+   * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis
			
 
				+   *        This function should be called for each input tensor to concatenate. The argument offset_y
			
 
				+   *        will be used to store the input tensor in the correct position in the output tensor
			
 
				+   *
			
 
				+   *        i.e.    offset_y = 0
			
 
				+   *                for(i = 0 i < num_input_tensors; ++i)
			
 
				+   *                {
			
 
				+   *                    arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y)
			
 
				+   *                    offset_y += input_y[i]
			
 
				+   *                }
			
 
				+   *
			
 
				+   *        This function assumes that the output tensor has:
			
 
				+   *        -# The same width of the input tensor
			
 
				+   *        -# The same number of channels of the input tensor
			
 
				+   *        -# The same batch size of the input tensor
			
 
				+   *
			
 
				+   *        Unless specified otherwise, arguments are mandatory.
			
 
				+   *
			
 
				+   * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation
			
 
				+   *
			
 
				+   * @param[in]  input    Pointer to input tensor
			
 
				+   * @param[in]  input_x  Width of input tensor
			
 
				+   * @param[in]  input_y  Height of input tensor
			
 
				+   * @param[in]  input_z  Channels in input tensor
			
 
				+   * @param[in]  input_w  Batch size in input tensor
			
 
				+   * @param[out] output   Pointer to output tensor
			
 
				+   * @param[in]  output_y Height of output tensor
			
 
				+   * @param[in]  offset_y The offset on the Y axis to start concatenating the input tensor
			
 
				+   *                      It is user responsibility to provide the correct value
			
 
				+   * @return     None
			
 
				+   *
			
 
				+   * <b> Input constraints</b>
			
 
				+   * offset_y is less than output_y
			
 
				+   *
			
 
				+   */
			
 
				+    void arm_concatenation_s8_y(const int8_t *input,
			
 
				+                                const uint16_t input_x,
			
 
				+                                const uint16_t input_y,
			
 
				+                                const uint16_t input_z,
			
 
				+                                const uint16_t input_w,
			
 
				+                                int8_t *output,
			
 
				+                                const uint16_t output_y,
			
 
				+                                const uint32_t offset_y);
			
 
				+
			
 
				+  /**
			
 
				+   * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis
			
 
				+   *        This function should be called for each input tensor to concatenate. The argument offset_z
			
 
				+   *        will be used to store the input tensor in the correct position in the output tensor
			
 
				+   *
			
 
				+   *        i.e.    offset_z = 0
			
 
				+   *                for(i = 0 i < num_input_tensors; ++i)
			
 
				+   *                {
			
 
				+   *                    arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z)
			
 
				+   *                    offset_z += input_z[i]
			
 
				+   *                }
			
 
				+   *
			
 
				+   *        This function assumes that the output tensor has:
			
 
				+   *        -# The same width of the input tensor
			
 
				+   *        -# The same height of the input tensor
			
 
				+   *        -# The same batch size of the input tensor
			
 
				+   *
			
 
				+   *        Unless specified otherwise, arguments are mandatory.
			
 
				+   *
			
 
				+   * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation
			
 
				+   *
			
 
				+   * @param[in]  input    Pointer to input tensor
			
 
				+   * @param[in]  input_x  Width of input tensor
			
 
				+   * @param[in]  input_y  Height of input tensor
			
 
				+   * @param[in]  input_z  Channels in input tensor
			
 
				+   * @param[in]  input_w  Batch size in input tensor
			
 
				+   * @param[out] output   Pointer to output tensor
			
 
				+   * @param[in]  output_z Channels in output tensor
			
 
				+   * @param[in]  offset_z The offset on the Z axis to start concatenating the input tensor
			
 
				+   *                      It is user responsibility to provide the correct value
			
 
				+   * @return     None
			
 
				+   *
			
 
				+   * <b> Input constraints</b>
			
 
				+   * offset_z is less than output_z
			
 
				+   *
			
 
				+   */
			
 
				+    void arm_concatenation_s8_z(const int8_t *input,
			
 
				+                                const uint16_t input_x,
			
 
				+                                const uint16_t input_y,
			
 
				+                                const uint16_t input_z,
			
 
				+                                const uint16_t input_w,
			
 
				+                                int8_t *output,
			
 
				+                                const uint16_t output_z,
			
 
				+                                const uint32_t offset_z);
			
 
				 
			
 
				+  /**
			
 
				+   * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size)
			
 
				+   *        This function should be called for each input tensor to concatenate. The argument offset_w
			
 
				+   *        will be used to store the input tensor in the correct position in the output tensor
			
 
				+   *
			
 
				+   *        i.e.    offset_w = 0
			
 
				+   *                for(i = 0 i < num_input_tensors; ++i)
			
 
				+   *                {
			
 
				+   *                    arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w)
			
 
				+   *                    offset_w += input_w[i]
			
 
				+   *                }
			
 
				+   *
			
 
				+   *        This function assumes that the output tensor has:
			
 
				+   *        -# The same width of the input tensor
			
 
				+   *        -# The same height of the input tensor
			
 
				+   *        -# The same number o channels of the input tensor
			
 
				+   *
			
 
				+   *        Unless specified otherwise, arguments are mandatory.
			
 
				+   *
			
 
				+   * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because does not involve any arithmetic operation
			
 
				+   *
			
 
				+   * @param[in]  input    Pointer to input tensor
			
 
				+   * @param[in]  input_x  Width of input tensor
			
 
				+   * @param[in]  input_y  Height of input tensor
			
 
				+   * @param[in]  input_z  Channels in input tensor
			
 
				+   * @param[in]  input_w  Batch size in input tensor
			
 
				+   * @param[out] output   Pointer to output tensor
			
 
				+   * @param[in]  offset_w The offset on the W axis to start concatenating the input tensor
			
 
				+   *                      It is user responsibility to provide the correct value
			
 
				+   * @return     None
			
 
				+   *
			
 
				+   */
			
 
				+    void arm_concatenation_s8_w(const int8_t *input,
			
 
				+                                const uint16_t input_x,
			
 
				+                                const uint16_t input_y,
			
 
				+                                const uint16_t input_z,
			
 
				+                                const uint16_t input_w,
			
 
				+                                int8_t *output,
			
 
				+                                const uint32_t offset_w);
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/CMSIS/NN/Source/CMakeLists.txt
+++ b/CMSIS/NN/Source/CMakeLists.txt
@@ -15,6 +15,7 @@ include(configDsp)
 
				 # There are some dependencies between the parts but they are not tracked
			
 
				 # by this cmake. So, enabling some functions may require to enable some
			
 
				 # other ones.
			
 
				+option(CONCATENATION        "Concatenation"         ON)
			
 
				 option(FULLYCONNECTED       "Fully Connected"       ON)
			
 
				 option(CONVOLUTION          "Convolutions"          ON)
			
 
				 option(ACTIVATION           "Activations"           ON)
			
@@ -48,6 +49,11 @@ if (BASICMATHSNN)
 
				   target_link_libraries(CMSISNN INTERFACE CMSISNNBasicMaths)
			
 
				 endif()
			
 
				 
			
 
				+if (CONCATENATION)
			
 
				+  add_subdirectory(ConcatenationFunctions)
			
 
				+  target_link_libraries(CMSISNN INTERFACE CMSISNNConcatenation)
			
 
				+endif()
			
 
				+
			
 
				 if (FULLYCONNECTED)
			
 
				   add_subdirectory(FullyConnectedFunctions)
			
 
				   target_link_libraries(CMSISNN INTERFACE CMSISNNFullyConnected)
			
--- a/CMSIS/NN/Source/ConcatenationFunctions/CMakeLists.txt
+++ b/CMSIS/NN/Source/ConcatenationFunctions/CMakeLists.txt
@@ -0,0 +1,15 @@
 
				+cmake_minimum_required (VERSION 3.6)
			
 
				+
			
 
				+project(CMSISNNConcatenation)
			
 
				+
			
 
				+include(configLib)
			
 
				+
			
 
				+file(GLOB SRC "./*_*.c")
			
 
				+
			
 
				+add_library(CMSISNNConcatenation STATIC ${SRC})
			
 
				+
			
 
				+configLib(CMSISNNConcatenation ${ROOT})
			
 
				+configDsp(CMSISNNConcatenation ${ROOT})
			
 
				+
			
 
				+### Includes
			
 
				+target_include_directories(CMSISNNConcatenation PUBLIC "${NN}/Include")
			
--- a/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
+++ b/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c
@@ -0,0 +1,65 @@
 
				+/*
			
 
				+ * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the License); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/* ----------------------------------------------------------------------
			
 
				+ * Project:      CMSIS NN Library
			
 
				+ * Title:        arm_concatenation_s8_w.c
			
 
				+ * Description:  s8 version of concatenation along the W axis
			
 
				+ *
			
 
				+ * $Date:        October 2019
			
 
				+ * $Revision:    V.1.0.0
			
 
				+ *
			
 
				+ * Target Processor:  Cortex-M cores
			
 
				+ *
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+#include "arm_nnfunctions.h"
			
 
				+
			
 
				+/**
			
 
				+ *  @ingroup groupNN
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * @addtogroup Concatenation
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+  /*
			
 
				+   *  s8 version of concatenation along the W axis
			
 
				+   *
			
 
				+   * Refer to header file for details.
			
 
				+   *
			
 
				+   */
			
 
				+void arm_concatenation_s8_w(const int8_t *input,
			
 
				+                            const uint16_t input_x,
			
 
				+                            const uint16_t input_y,
			
 
				+                            const uint16_t input_z,
			
 
				+                            const uint16_t input_w,
			
 
				+                            int8_t *output,
			
 
				+                            const uint32_t offset_w)
			
 
				+{
			
 
				+    const uint32_t input_copy_size = input_x * input_y * input_z * input_w;
			
 
				+
			
 
				+    output += offset_w * (input_x * input_y * input_z);
			
 
				+
			
 
				+    memcpy(output, input, input_copy_size);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @} end of Concatenation group
			
 
				+ */
			
--- a/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
+++ b/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c
@@ -0,0 +1,74 @@
 
				+/*
			
 
				+ * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the License); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/* ----------------------------------------------------------------------
			
 
				+ * Project:      CMSIS NN Library
			
 
				+ * Title:        arm_concatenation_s8_x.c
			
 
				+ * Description:  s8 version of concatenation along the X axis
			
 
				+ *
			
 
				+ * $Date:        October 2019
			
 
				+ * $Revision:    V.1.0.0
			
 
				+ *
			
 
				+ * Target Processor:  Cortex-M cores
			
 
				+ *
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+#include "arm_nnfunctions.h"
			
 
				+
			
 
				+/**
			
 
				+ *  @ingroup groupNN
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * @addtogroup Concatenation
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+  /*
			
 
				+   *  s8 version of concatenation along the X axis
			
 
				+   *
			
 
				+   * Refer to header file for details.
			
 
				+   *
			
 
				+   */
			
 
				+void arm_concatenation_s8_x(const int8_t *input,
			
 
				+                            const uint16_t input_x,
			
 
				+                            const uint16_t input_y,
			
 
				+                            const uint16_t input_z,
			
 
				+                            const uint16_t input_w,
			
 
				+                            int8_t *output,
			
 
				+                            const uint16_t output_x,
			
 
				+                            const uint32_t offset_x)
			
 
				+{
			
 
				+    const uint32_t num_iterations = input_y * input_z * input_w;
			
 
				+
			
 
				+    output += offset_x;
			
 
				+
			
 
				+    uint32_t i;
			
 
				+
			
 
				+    // Copy per row
			
 
				+    for (i = 0; i < num_iterations; ++i)
			
 
				+    {
			
 
				+        memcpy(output, input, input_x);
			
 
				+        input  += input_x;
			
 
				+        output += output_x;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @} end of Concatenation group
			
 
				+ */
			
--- a/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
+++ b/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c
@@ -0,0 +1,75 @@
 
				+/*
			
 
				+ * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the License); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/* ----------------------------------------------------------------------
			
 
				+ * Project:      CMSIS NN Library
			
 
				+ * Title:        arm_concatenation_s8_y.c
			
 
				+ * Description:  s8 version of concatenation along the Y axis
			
 
				+ *
			
 
				+ * $Date:        October 2019
			
 
				+ * $Revision:    V.1.0.0
			
 
				+ *
			
 
				+ * Target Processor:  Cortex-M cores
			
 
				+ *
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+#include "arm_nnfunctions.h"
			
 
				+
			
 
				+/**
			
 
				+ *  @ingroup groupNN
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * @addtogroup Concatenation
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+  /*
			
 
				+   *  s8 version of concatenation along the Y axis
			
 
				+   *
			
 
				+   * Refer to header file for details.
			
 
				+   *
			
 
				+   */
			
 
				+void arm_concatenation_s8_y(const int8_t *input,
			
 
				+                            const uint16_t input_x,
			
 
				+                            const uint16_t input_y,
			
 
				+                            const uint16_t input_z,
			
 
				+                            const uint16_t input_w,
			
 
				+                            int8_t *output,
			
 
				+                            const uint16_t output_y,
			
 
				+                            const uint32_t offset_y)
			
 
				+{
			
 
				+    const uint32_t num_iterations  = input_z * input_w;
			
 
				+    const uint32_t input_copy_size = input_x * input_y;
			
 
				+    const uint32_t output_stride   = input_x * output_y;
			
 
				+
			
 
				+    output += offset_y * input_x;
			
 
				+    uint32_t i;
			
 
				+
			
 
				+    // Copy per tile
			
 
				+    for (i = 0; i < num_iterations; ++i)
			
 
				+    {
			
 
				+        memcpy(output, input, input_copy_size);
			
 
				+        input  += input_copy_size;
			
 
				+        output += output_stride;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @} end of Concatenation group
			
 
				+ */
			
--- a/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
+++ b/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c
@@ -0,0 +1,74 @@
 
				+/*
			
 
				+ * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
			
 
				+ *
			
 
				+ * SPDX-License-Identifier: Apache-2.0
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the License); you may
			
 
				+ * not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ * www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/* ----------------------------------------------------------------------
			
 
				+ * Project:      CMSIS NN Library
			
 
				+ * Title:        arm_concatenation_s8_z.c
			
 
				+ * Description:  s8 version of concatenation along the Z axis
			
 
				+ *
			
 
				+ * $Date:        October 2019
			
 
				+ * $Revision:    V.1.0.0
			
 
				+ *
			
 
				+ * Target Processor:  Cortex-M cores
			
 
				+ *
			
 
				+ * -------------------------------------------------------------------- */
			
 
				+
			
 
				+#include "arm_nnfunctions.h"
			
 
				+
			
 
				+/**
			
 
				+ *  @ingroup groupNN
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * @addtogroup Concatenation
			
 
				+ * @{
			
 
				+ */
			
 
				+
			
 
				+  /*
			
 
				+   *  s8 version of concatenation along the Z axis
			
 
				+   *
			
 
				+   * Refer to header file for details.
			
 
				+   *
			
 
				+   */
			
 
				+void arm_concatenation_s8_z(const int8_t *input,
			
 
				+                            const uint16_t input_x,
			
 
				+                            const uint16_t input_y,
			
 
				+                            const uint16_t input_z,
			
 
				+                            const uint16_t input_w,
			
 
				+                            int8_t *output,
			
 
				+                            const uint16_t output_z,
			
 
				+                            const uint32_t offset_z)
			
 
				+{
			
 
				+    const uint32_t input_copy_size = input_x * input_y * input_z;
			
 
				+    const uint32_t output_stride   = input_x * input_y * output_z;
			
 
				+
			
 
				+    output += offset_z * (input_x * input_y);
			
 
				+
			
 
				+    uint32_t i;
			
 
				+
			
 
				+    for (i = 0; i < input_w; ++i)
			
 
				+    {
			
 
				+        memcpy(output, input, input_copy_size);
			
 
				+        input  += input_copy_size;
			
 
				+        output += output_stride;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * @} end of Concatenation group
			
 
				+ */