Sfoglia il codice sorgente

CMSIS-NN: Add softmax s16 support (#1425)

CMSIS-NN: Add softmax s16 support

Adds scalar version of softmax with int8 input and int16 output.
Adds scalar version of softmax with int16 input and int16 output.
Adds unit tests.
Måns Nilsson 4 anni fa
parent
commit
ced3c17d2c
36 ha cambiato i file con 1084 aggiunte e 155 eliminazioni
  1. 5 3
      Include/arm_nn_math_types.h
  2. 10 3
      Include/arm_nn_types.h
  3. 53 3
      Include/arm_nnfunctions.h
  4. 32 5
      Include/arm_nnsupportfunctions.h
  5. 2 0
      README.md
  6. 6 6
      Source/NNSupportFunctions/CMakeLists.txt
  7. 4 2
      Source/SoftmaxFunctions/CMakeLists.txt
  8. 143 0
      Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c
  9. 122 0
      Source/SoftmaxFunctions/arm_softmax_s16.c
  10. 7 53
      Source/SoftmaxFunctions/arm_softmax_s8.c
  11. 55 0
      Source/SoftmaxFunctions/arm_softmax_s8_s16.c
  12. 2 0
      Tests/UnitTest/CMakeLists.txt
  13. 1 1
      Tests/UnitTest/PregeneratedData/softmax/input.txt
  14. 2 0
      Tests/UnitTest/PregeneratedData/softmax_s16/input.txt
  15. 2 0
      Tests/UnitTest/PregeneratedData/softmax_s8_s16/input.txt
  16. 55 0
      Tests/UnitTest/TestCases/Common/Softmax/exp_lut_data.h
  17. 55 0
      Tests/UnitTest/TestCases/Common/Softmax/one_by_one_lut_data.h
  18. 90 0
      Tests/UnitTest/TestCases/Common/Softmax/softmax_int8_to_int16_template.json
  19. 1 1
      Tests/UnitTest/TestCases/TestData/softmax/input_data.h
  20. 1 1
      Tests/UnitTest/TestCases/TestData/softmax/output_ref_data.h
  21. 7 0
      Tests/UnitTest/TestCases/TestData/softmax_s16/config_data.h
  22. 5 0
      Tests/UnitTest/TestCases/TestData/softmax_s16/input_data.h
  23. 5 0
      Tests/UnitTest/TestCases/TestData/softmax_s16/output_ref_data.h
  24. 4 0
      Tests/UnitTest/TestCases/TestData/softmax_s16/test_data.h
  25. 8 0
      Tests/UnitTest/TestCases/TestData/softmax_s8_s16/config_data.h
  26. 5 0
      Tests/UnitTest/TestCases/TestData/softmax_s8_s16/input_data.h
  27. 6 0
      Tests/UnitTest/TestCases/TestData/softmax_s8_s16/output_ref_data.h
  28. 4 0
      Tests/UnitTest/TestCases/TestData/softmax_s8_s16/test_data.h
  29. 23 0
      Tests/UnitTest/TestCases/test_arm_softmax_s16/CMakeLists.txt
  30. 47 0
      Tests/UnitTest/TestCases/test_arm_softmax_s16/Unity/unity_test_arm_softmax_s16.c
  31. 45 0
      Tests/UnitTest/TestCases/test_arm_softmax_s16/test_arm_softmax_s16.c
  32. 8 2
      Tests/UnitTest/TestCases/test_arm_softmax_s8/test_arm_softmax_s8.c
  33. 23 0
      Tests/UnitTest/TestCases/test_arm_softmax_s8_s16/CMakeLists.txt
  34. 49 0
      Tests/UnitTest/TestCases/test_arm_softmax_s8_s16/Unity/unity_test_arm_softmax_s8_s16.c
  35. 66 0
      Tests/UnitTest/TestCases/test_arm_softmax_s8_s16/test_arm_softmax_s8_s16.c
  36. 131 75
      Tests/UnitTest/generate_test_data.py

+ 5 - 3
Include/arm_nn_math_types.h

@@ -1,12 +1,12 @@
 /******************************************************************************
  * @file     arm_nn_math_types.h
  * @brief    Compiler include and basic types
- * @version  V1.0.0
- * @date     08 July 2021
+ * @version  V1.1.0
+ * @date     09 March 2022
  * Target Processor: Cortex-M
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -145,8 +145,10 @@ extern "C" {
  */
 
 #define NN_Q31_MAX ((q31_t)(0x7FFFFFFFL))
+#define NN_Q15_MAX ((q15_t)(0x7FFF))
 #define NN_Q7_MAX ((q7_t)(0x7F))
 #define NN_Q31_MIN ((q31_t)(0x80000000L))
+#define NN_Q15_MIN ((q15_t)(0x8000))
 #define NN_Q7_MIN ((q7_t)(0x80))
 
 /**

+ 10 - 3
Include/arm_nn_types.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -22,8 +22,8 @@
  * Description:  Public header file to contain the CMSIS-NN structs for the
  *               TensorFlowLite micro compliant functions
  *
- * $Date:        19. March 2021
- * $Revision:    V.2.0.0
+ * $Date:        22. Februari 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  * -------------------------------------------------------------------- */
@@ -127,4 +127,11 @@ typedef struct
     cmsis_nn_activation output_activation;
 } cmsis_nn_svdf_params;
 
+/** CMSIS-NN object for Softmax s16 layer parameters */
+typedef struct
+{
+    const int16_t *exp_lut;
+    const int16_t *one_by_one_lut;
+} cmsis_nn_softmax_lut_s16;
+
 #endif // _ARM_NN_TYPES_H

+ 53 - 3
Include/arm_nnfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnfunctions.h
  * Description:  Public header file for CMSIS NN Library
  *
- * $Date:        14 February 2022
- * $Revision:    V.8.0.1
+ * $Date:        22 February 2022
+ * $Revision:    V.8.1.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -2070,7 +2070,6 @@ void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out);
  * @note Supported framework: TensorFlow Lite micro (bit-accurate)
  *
  */
-
 void arm_softmax_s8(const int8_t *input,
                     const int32_t num_rows,
                     const int32_t row_size,
@@ -2079,6 +2078,57 @@ void arm_softmax_s8(const int8_t *input,
                     const int32_t diff_min,
                     int8_t *output);
 
+/**
+ * @brief S8 to s16 softmax function
+ * @param[in]  input     Pointer to the input tensor
+ * @param[in]  num_rows  Number of rows in the input tensor
+ * @param[in]  row_size  Number of elements in each input row
+ * @param[in]  mult      Input quantization multiplier
+ * @param[in]  shift     Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min  Minimum difference with max in row. Used to check if
+ *                       the quantized exponential operation can be performed
+ * @param[out] output    Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+void arm_softmax_s8_s16(const int8_t *input,
+                        const int32_t num_rows,
+                        const int32_t row_size,
+                        const int32_t mult,
+                        const int32_t shift,
+                        const int32_t diff_min,
+                        int16_t *output);
+
+/**
+ * @brief S16 softmax function
+ * @param[in]  input           Pointer to the input tensor
+ * @param[in]  num_rows        Number of rows in the input tensor
+ * @param[in]  row_size        Number of elements in each input row
+ * @param[in]  mult            Input quantization multiplier
+ * @param[in]  shift           Input quantization shift within the range [0, 31]
+ * @param[in]  softmax_params  Softmax s16 layer parameters with two pointers to LUTs speficied below.
+ *                             For indexing the high 9 bits are used and 7 remaining for interpolation.
+ *                             That means 512 entries for the 9-bit indexing and 1 extra for interpolation, i.e. 513
+ *                             values for each LUT.
+ *                             - Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0]
+ *                             - Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0]
+ * @param[out] output          Pointer to the output tensor
+ * @return                        The function returns
+ *                                    <code>ARM_MATH_ARGUMENT_ERROR</code> if LUTs are NULL
+ *                                    <code>ARM_MATH_SUCCESS</code> - Successful operation
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+arm_status arm_softmax_s16(const int16_t *input,
+                           const int32_t num_rows,
+                           const int32_t row_size,
+                           const int32_t mult,
+                           const int32_t shift,
+                           const cmsis_nn_softmax_lut_s16 *softmax_params,
+                           int16_t *output);
+
 /**
  * @brief U8 softmax function
  * @param[in]  input     Pointer to the input tensor

+ 32 - 5
Include/arm_nnsupportfunctions.h

@@ -21,8 +21,9 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        7. February 2022
- * $Revision:    V.6.1.0
+
+ * $Date:        24. February 2022
+ * $Revision:    V.6.2.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -33,6 +34,8 @@
 #include "arm_nn_math_types.h"
 #include "arm_nn_types.h"
 
+#include <stdbool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -776,6 +779,30 @@ q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
                                     const int32_t *const output_bias,
                                     q7_t *out_0);
 
+/**
+ * @brief Common softmax function for s8 input and s8 or s16 output
+ * @param[in]  input          Pointer to the input tensor
+ * @param[in]  num_rows       Number of rows in the input tensor
+ * @param[in]  row_size       Number of elements in each input row
+ * @param[in]  mult           Input quantization multiplier
+ * @param[in]  shift          Input quantization shift within the range [0, 31]
+ * @param[in]  diff_min       Minimum difference with max in row. Used to check if
+ *                            the quantized exponential operation can be performed
+ * @param[in]  int16_output   Indicating s8 output if 0 else s16 output
+ * @param[out] output         Pointer to the output tensor
+ *
+ * @note Supported framework: TensorFlow Lite micro (bit-accurate)
+ *
+ */
+void arm_nn_softmax_common_s8(const int8_t *input,
+                              const int32_t num_rows,
+                              const int32_t row_size,
+                              const int32_t mult,
+                              const int32_t shift,
+                              const int32_t diff_min,
+                              const bool int16_output,
+                              void *output);
+
 /**
  * @brief macro for adding rounding offset
  */
@@ -919,10 +946,10 @@ __STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multip
 
 /**
  * @brief           Requantize a given 64 bit value.
- * @param[in]       val                 Value to be requantized
- * @param[in]       reduced_multiplier  Reduced multiplier from range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1,
+ * @param[in]       val                 Value to be requantized in the range {-(1<<47)} to {(1<<47) - 1}
+ * @param[in]       reduced_multiplier  Reduced multiplier in the range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1,
  * Q16_MAX}
- * @param[in]       shift               left or right shift for 'val * multiplier'
+ * @param[in]       shift               Left or right shift for 'val * multiplier' in the range {-31} to {7}
  *
  * @return          Returns (val * multiplier)/(2 ^ shift)
  *

+ 2 - 0
README.md

@@ -46,6 +46,8 @@ Group | API | Base Operator | Input Constraints | Additional memory required for
 |[Softmax](https://arm-software.github.io/CMSIS_5/NN/html/group__Softmax.html)||||| |  ||
 ||arm_softmax_q7()| SOFTMAX | None | None | Yes | No | Not bit exact to TFLu but can be up to 70x faster |
 ||arm_softmax_s8()| SOFTMAX | None | None | No | Yes | Bit exact to TFLu |
+||arm_softmax_s8_s16()| SOFTMAX | None | None | No | No | Bit exact to TFLu |
+||arm_softmax_s16()| SOFTMAX | None | None | No | No | Bit exact to TFLu |
 ||arm_softmax_u8()| SOFTMAX | None | None | No | No | Bit exact to TFLu |
 |[SVDF](https://arm-software.github.io/CMSIS_5/NN/html/group__SVDF.html)||||| |  ||
 ||arm_svdf_s8()| SVDF | None | None | Yes | Yes | Bit exact to TFLu |

+ 6 - 6
Source/NNSupportFunctions/CMakeLists.txt

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2021 Arm Limited.
+# Copyright (c) 2019-2022 Arm Limited.
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -18,9 +18,9 @@
 
 file(GLOB SRC "./*_s8.c")
 target_sources(cmsis-nn PRIVATE ${SRC} arm_q7_to_q15_with_offset.c
-                                      arm_nn_mat_mul_kernel_s16.c
-                                      arm_q7_to_q15_with_offset.c
-                                      arm_nn_mat_mul_kernel_s16.c
-                                      arm_nn_vec_mat_mult_t_s16.c
-                                      arm_q7_to_q15_no_shift.c)
+                                       arm_nn_mat_mul_kernel_s16.c
+                                       arm_q7_to_q15_with_offset.c
+                                       arm_nn_mat_mul_kernel_s16.c
+                                       arm_nn_vec_mat_mult_t_s16.c
+                                       arm_q7_to_q15_no_shift.c)
 

+ 4 - 2
Source/SoftmaxFunctions/CMakeLists.txt

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2021 Arm Limited.
+# Copyright (c) 2019-2022 Arm Limited.
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -17,4 +17,6 @@
 #
 
 file(GLOB SRC "./*_s8.c")
-target_sources(cmsis-nn PRIVATE ${SRC})
+target_sources(cmsis-nn PRIVATE ${SRC} arm_softmax_s8_s16.c
+                                       arm_softmax_s16.c
+                                       arm_nn_softmax_common_s8.c)

+ 143 - 0
Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c

@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_nn_softmax_common_s8.c
+ * Description:  Softmax with s8 input and output of s8 or s16.
+ *
+ * $Date:        9 March 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M processors
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnsupportfunctions.h"
+
+#define ACCUM_BITS 12
+
+/**
+ * @ingroup groupSupport
+ */
+
+/**
+ * @addtogroup Softmax
+ * @{
+ */
+
+/*
+ * Softmax function with s8 input and output of s8 or s16.
+ *
+ * Refer header file for details.
+ *
+ */
+void arm_nn_softmax_common_s8(const int8_t *input,
+                              const int32_t num_rows,
+                              const int32_t row_size,
+                              const int32_t mult,
+                              const int32_t shift,
+                              const int32_t diff_min,
+                              const bool int16_output,
+                              void *output)
+{
+    const int32_t mask = (1 << shift);
+
+    int32_t col = 0;
+    int32_t row_idx;
+
+    for (row_idx = 0; row_idx < num_rows; ++row_idx)
+    {
+        // Find the maximum value in order to ensure numerical stability
+        int8_t max = *input;
+
+        for (col = 1; col < row_size; ++col)
+        {
+            max = MAX(max, input[col]);
+        }
+
+        int32_t diff = 0;
+        int32_t sum = 0;
+
+        for (col = 0; col < row_size; ++col)
+        {
+            diff = input[col] - max;
+            if (diff >= diff_min)
+            {
+                sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS);
+            }
+        }
+
+        const int32_t headroom = __CLZ(sum);
+        const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31));
+        int32_t bits_over_unit;
+
+        if (int16_output)
+        {
+            int16_t *output_s16 = (int16_t *)output;
+
+            bits_over_unit = ACCUM_BITS - headroom + 15;
+
+            for (col = 0; col < row_size; ++col)
+            {
+                diff = input[col] - max;
+
+                if (diff >= diff_min)
+                {
+                    const int32_t res =
+                        DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) +
+                        NN_Q15_MIN;
+                    output_s16[col] = (int16_t)CLAMP(res, (int32_t)NN_Q15_MAX, (int32_t)NN_Q15_MIN);
+                }
+                else
+                {
+                    output_s16[col] = NN_Q15_MIN;
+                }
+            }
+            output_s16 += row_size;
+        }
+        else
+        {
+            int8_t *output_s8 = (int8_t *)output;
+
+            bits_over_unit = ACCUM_BITS - headroom + 23;
+
+            for (col = 0; col < row_size; ++col)
+            {
+                diff = input[col] - max;
+                if (diff >= diff_min)
+                {
+                    const int32_t res =
+                        DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) +
+                        NN_Q7_MIN;
+                    output_s8[col] = (int8_t)CLAMP(res, (int32_t)NN_Q7_MAX, (int32_t)NN_Q7_MIN);
+                }
+                else
+                {
+                    output_s8[col] = NN_Q7_MIN;
+                }
+            }
+            output_s8 += row_size;
+        }
+
+        input += row_size;
+    }
+}
+
+/**
+ * @} end of NNBasicMath group
+ */

+ 122 - 0
Source/SoftmaxFunctions/arm_softmax_s16.c

@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_softmax_s16.c
+ * Description:  S16 softmax function
+ *
+ * $Date:        9 March 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ * @addtogroup Softmax
+ * @{
+ */
+
+arm_status arm_softmax_s16(const int16_t *input,
+                           const int32_t num_rows,
+                           const int32_t row_size,
+                           const int32_t mult,
+                           const int32_t shift,
+                           const cmsis_nn_softmax_lut_s16 *softmax_params,
+                           int16_t *output)
+{
+    int32_t col = 0;
+    int32_t row_idx;
+
+    if (softmax_params->exp_lut == NULL || softmax_params->one_by_one_lut == NULL)
+    {
+        return ARM_MATH_ARGUMENT_ERROR;
+    }
+
+    for (row_idx = 0; row_idx < num_rows; ++row_idx)
+    {
+        // Find the maximum value in order to ensure numerical stability
+        int16_t max = *input;
+        for (col = 1; col < row_size; ++col)
+        {
+            max = MAX(max, input[col]);
+        }
+
+        int32_t diff = 0;
+        int32_t sum = 0;
+        int16_t *cached_exp_results = output;
+
+        for (col = 0; col < row_size; ++col)
+        {
+            diff = input[col] - max;
+            const int32_t scaled_diff = arm_nn_requantize(diff, mult, shift);
+            const int32_t symmetric_scaled_diff = scaled_diff + NN_Q15_MAX;
+            const int16_t saturated_symmetric_scaled_diff = MIN(MAX(symmetric_scaled_diff, NN_Q15_MIN), NN_Q15_MAX);
+
+            // Lookup from exp table and cache result for next step
+            const int16_t index = (256 + (saturated_symmetric_scaled_diff >> 7));
+            const int16_t offset = saturated_symmetric_scaled_diff & 0x7f;
+            const int16_t base = softmax_params->exp_lut[index];
+            const int16_t slope = softmax_params->exp_lut[index + 1] - softmax_params->exp_lut[index];
+            const int16_t delta = (slope * offset + 64) >> 7;
+            const int16_t result = (base + delta);
+            cached_exp_results[col] = result;
+
+            sum += cached_exp_results[col];
+        }
+
+        const int32_t headroom = __CLZ(sum);
+
+        // Compute the reciprocal 1/sum
+        const int32_t shifted_sum = (((sum) << (headroom - 1)) + (1 << 13)) >> 14;
+
+        // Since LUT computes 1/(1 + x), compute x = (sum - 1) => -65536
+        // Since LUT expects a symmetrical input, recenter from [UINT16_MIN, UINT16_MAX] to [INT16_MIN, INT16_MAX] =>
+        // -32768 ==> So in total -65536 -32768 => -98304
+        const int16_t symmetric_shifted_sum = shifted_sum - 98304;
+
+        // Lookup from one by one table
+        const int16_t index = (256 + (symmetric_shifted_sum >> 7));
+        const int16_t offset = symmetric_shifted_sum & 0x7f;
+        const int16_t base = softmax_params->one_by_one_lut[index];
+        const int16_t slope = softmax_params->one_by_one_lut[index + 1] - softmax_params->one_by_one_lut[index];
+        const int16_t delta = (slope * offset + 64) >> 7;
+        const int16_t one_by_one_result = (base + delta);
+
+        for (col = 0; col < row_size; ++col)
+        {
+            const int16_t right_shift = 30 - headroom;
+            int32_t result = (cached_exp_results[col] * one_by_one_result) >> right_shift;
+            result = (result + 1) >> 1; // Last shift position and insert round
+            output[col] = (int16_t)result;
+        }
+
+        output += row_size;
+        input += row_size;
+    }
+
+    return ARM_MATH_SUCCESS;
+}
+
+/**
+ * @} end of Softmax group
+ */

+ 7 - 53
Source/SoftmaxFunctions/arm_softmax_s8.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -21,8 +21,8 @@
  * Title:        arm_softmax_s8.c
  * Description:  S8 softmax function
  *
- * $Date:        17. August 2021
- * $Revision:    V.2.0.3
+ * $Date:        9 March 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -192,7 +192,8 @@ void arm_softmax_s8(const int8_t *input,
             if (diff >= diff_min)
             {
                 const int32_t res =
-                    DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128;
+                    DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) +
+                    NN_Q7_MIN;
                 output[tail_idx + i] = (int8_t)CLAMP(res, (int32_t)ACT_MAX, (int32_t)ACT_MIN);
             }
             else
@@ -205,57 +206,10 @@ void arm_softmax_s8(const int8_t *input,
         output += row_size;
     }
 #else
-    const int32_t mask = (1 << shift);
-
-    int32_t col = 0;
-    int32_t row_idx;
-
-    for (row_idx = 0; row_idx < num_rows; ++row_idx)
-    {
-        // Find the maximum value in order to ensure numerical stability
-        int8_t max = *input;
-
-        for (col = 1; col < row_size; ++col)
-        {
-            max = MAX(max, input[col]);
-        }
-
-        int32_t diff = 0;
-        int32_t sum = 0;
-
-        for (col = 0; col < row_size; ++col)
-        {
-            diff = input[col] - max;
-            if (diff >= diff_min)
-            {
-                sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS);
-            }
-        }
-
-        const int32_t headroom = __CLZ(sum);
-        const int32_t bits_over_unit = ACCUM_BITS - headroom + 23;
-        const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31));
-
-        for (col = 0; col < row_size; ++col)
-        {
-            diff = input[col] - max;
-            if (diff >= diff_min)
-            {
-                const int32_t res =
-                    DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128;
-                output[col] = (int8_t)CLAMP(res, (int32_t)127, (int32_t)-128);
-            }
-            else
-            {
-                output[col] = -128;
-            }
-        }
-        input += row_size;
-        output += row_size;
-    }
-
+    arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, false, (void *)output);
 #endif
 }
+
 /**
  * @} end of Softmax group
  */

+ 55 - 0
Source/SoftmaxFunctions/arm_softmax_s8_s16.c

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS NN Library
+ * Title:        arm_softmax_s8_s16.c
+ * Description:  S8 to s16 softmax function
+ *
+ * $Date:        7 January 2022
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor:  Cortex-M cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "arm_nnfunctions.h"
+#include "arm_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup Softmax
+ * @{
+ */
+
+void arm_softmax_s8_s16(const int8_t *input,
+                        const int32_t num_rows,
+                        const int32_t row_size,
+                        const int32_t mult,
+                        const int32_t shift,
+                        const int32_t diff_min,
+                        int16_t *output)
+{
+    arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, true, (void *)output);
+}
+/**
+ * @} end of Softmax group
+ */

+ 2 - 0
Tests/UnitTest/CMakeLists.txt

@@ -85,6 +85,8 @@ add_subdirectory(TestCases/test_arm_fully_connected_s8)
 add_subdirectory(TestCases/test_arm_max_pool_s16)
 add_subdirectory(TestCases/test_arm_max_pool_s8)
 add_subdirectory(TestCases/test_arm_softmax_s8)
+add_subdirectory(TestCases/test_arm_softmax_s8_s16)
+add_subdirectory(TestCases/test_arm_softmax_s16)
 add_subdirectory(TestCases/test_arm_svdf_s8)
 
 set(MAKE_CMD "python3")

+ 1 - 1
Tests/UnitTest/PregeneratedData/softmax/input.txt

@@ -1,2 +1,2 @@
 # 1,5
-4.300000000000000000e+01,4.900000000000000000e+01,7.100000000000000000e+01,1.000000000000000000e+00,6.400000000000000000e+01
+-9.900000000000000000e+01,-1.220000000000000000e+02,-6.700000000000000000e+01,1.000000000000000000e+01,-8.200000000000000000e+01

+ 2 - 0
Tests/UnitTest/PregeneratedData/softmax_s16/input.txt

@@ -0,0 +1,2 @@
+# 1,10
+1.847000000000000000e+03,2.831400000000000000e+04,-1.539900000000000000e+04,-2.144500000000000000e+04,2.364600000000000000e+04,-3.276600000000000000e+04,2.952100000000000000e+04,-5.296000000000000000e+03,-1.753000000000000000e+03,-1.600400000000000000e+04

+ 2 - 0
Tests/UnitTest/PregeneratedData/softmax_s8_s16/input.txt

@@ -0,0 +1,2 @@
+# 1,12
+-5.000000000000000000e+01,2.200000000000000000e+01,5.900000000000000000e+01,-6.900000000000000000e+01,1.500000000000000000e+01,3.100000000000000000e+01,-7.300000000000000000e+01,7.400000000000000000e+01,-5.700000000000000000e+01,-7.900000000000000000e+01,-1.070000000000000000e+02,1.900000000000000000e+01

+ 55 - 0
Tests/UnitTest/TestCases/Common/Softmax/exp_lut_data.h

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0].
+const q15_t softmax_s16_exp_lut[513] = {
+    2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
+    2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,     3,     3,     3,     3,     3,
+    3,     3,     3,     3,     3,     3,     3,     3,     3,     3,     3,     3,     4,     4,     4,     4,
+    4,     4,     4,     4,     4,     4,     4,     4,     4,     5,     5,     5,     5,     5,     5,     5,
+    5,     5,     5,     6,     6,     6,     6,     6,     6,     6,     6,     7,     7,     7,     7,     7,
+    7,     7,     7,     8,     8,     8,     8,     8,     8,     9,     9,     9,     9,     9,     9,     10,
+    10,    10,    10,    10,    11,    11,    11,    11,    11,    12,    12,    12,    12,    13,    13,    13,
+    13,    14,    14,    14,    14,    15,    15,    15,    16,    16,    16,    17,    17,    17,    18,    18,
+    18,    19,    19,    19,    20,    20,    21,    21,    21,    22,    22,    23,    23,    24,    24,    25,
+    25,    26,    26,    27,    27,    28,    28,    29,    29,    30,    30,    31,    32,    32,    33,    34,
+    34,    35,    36,    36,    37,    37,    38,    39,    40,    40,    42,    42,    43,    44,    45,    45,
+    46,    47,    48,    49,    50,    51,    52,    53,    54,    55,    56,    57,    59,    60,    60,    62,
+    63,    65,    65,    67,    68,    69,    71,    73,    74,    75,    77,    78,    80,    81,    83,    85,
+    86,    88,    90,    92,    93,    95,    97,    99,    101,   103,   105,   107,   109,   112,   114,   116,
+    118,   121,   123,   126,   128,   131,   133,   135,   139,   141,   144,   147,   149,   152,   155,   158,
+    162,   165,   168,   171,   174,   178,   181,   185,   189,   192,   196,   200,   204,   208,   212,   217,
+    221,   225,   230,   234,   239,   243,   248,   253,   258,   263,   268,   273,   279,   284,   290,   296,
+    302,   308,   314,   320,   327,   333,   340,   346,   353,   360,   366,   374,   381,   389,   397,   404,
+    413,   421,   429,   437,   446,   455,   464,   473,   482,   492,   501,   511,   522,   532,   543,   553,
+    564,   575,   586,   598,   610,   622,   634,   646,   659,   672,   685,   699,   713,   727,   741,   756,
+    771,   786,   801,   817,   833,   850,   866,   884,   901,   919,   937,   955,   974,   993,   1013,  1033,
+    1053,  1074,  1095,  1117,  1139,  1161,  1184,  1207,  1232,  1256,  1281,  1306,  1332,  1358,  1385,  1412,
+    1440,  1468,  1497,  1527,  1557,  1587,  1619,  1651,  1683,  1716,  1750,  1785,  1820,  1856,  1892,  1930,
+    1968,  2006,  2046,  2087,  2128,  2170,  2212,  2256,  2300,  2346,  2392,  2439,  2488,  2537,  2587,  2638,
+    2690,  2743,  2796,  2852,  2908,  2966,  3024,  3084,  3145,  3207,  3270,  3334,  3400,  3467,  3535,  3605,
+    3677,  3749,  3822,  3898,  3975,  4053,  4133,  4214,  4297,  4383,  4469,  4557,  4647,  4739,  4833,  4927,
+    5024,  5124,  5225,  5328,  5433,  5541,  5649,  5761,  5875,  5991,  6109,  6230,  6352,  6477,  6605,  6736,
+    6868,  7004,  7141,  7282,  7427,  7572,  7722,  7874,  8030,  8188,  8350,  8514,  8683,  8854,  9028,  9206,
+    9387,  9572,  9762,  9954,  10151, 10351, 10555, 10763, 10976, 11191, 11412, 11637, 11867, 12102, 12341, 12583,
+    12831, 13085, 13342, 13606, 13874, 14148, 14427, 14711, 15002, 15297, 15599, 15907, 16221, 16541, 16867, 17199,
+    17539, 17884, 18237, 18597, 18964, 19338, 19719, 20108, 20505, 20909, 21322, 21742, 22171, 22608, 23054, 23509,
+    23973, 24445, 24928, 25419, 25921, 26432, 26953, 27485, 28027, 28580, 29143, 29718, 30304, 30902, 31512, 32133,
+    32767};

+ 55 - 0
Tests/UnitTest/TestCases/Common/Softmax/one_by_one_lut_data.h

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0].
+const q15_t softmax_s16_one_by_one_lut[513] = {
+    32767, 32704, 32640, 32578, 32514, 32451, 32388, 32326, 32264, 32202, 32141, 32079, 32018, 31957, 31896, 31835,
+    31775, 31715, 31655, 31596, 31537, 31476, 31418, 31359, 31301, 31242, 31184, 31127, 31069, 31011, 30954, 30897,
+    30840, 30784, 30727, 30671, 30615, 30560, 30504, 30449, 30394, 30339, 30283, 30229, 30175, 30121, 30067, 30013,
+    29960, 29906, 29853, 29800, 29746, 29694, 29642, 29589, 29537, 29486, 29434, 29382, 29331, 29280, 29229, 29177,
+    29127, 29076, 29026, 28976, 28926, 28877, 28827, 28777, 28728, 28679, 28630, 28581, 28532, 28484, 28436, 28388,
+    28340, 28292, 28244, 28197, 28150, 28103, 28056, 28008, 27962, 27915, 27869, 27823, 27777, 27731, 27685, 27640,
+    27594, 27549, 27504, 27459, 27413, 27369, 27324, 27280, 27236, 27192, 27148, 27104, 27060, 27016, 26973, 26930,
+    26887, 26844, 26801, 26758, 26715, 26673, 26630, 26588, 26546, 26504, 26463, 26421, 26380, 26338, 26297, 26255,
+    26214, 26174, 26132, 26092, 26051, 26011, 25971, 25931, 25891, 25851, 25811, 25772, 25732, 25693, 25653, 25614,
+    25575, 25536, 25497, 25458, 25420, 25381, 25343, 25305, 25267, 25229, 25191, 25153, 25116, 25078, 25041, 25003,
+    24966, 24928, 24892, 24855, 24818, 24781, 24745, 24709, 24672, 24636, 24600, 24564, 24528, 24492, 24457, 24421,
+    24385, 24350, 24315, 24280, 24245, 24210, 24175, 24140, 24105, 24070, 24036, 24002, 23967, 23933, 23899, 23865,
+    23831, 23798, 23764, 23730, 23697, 23664, 23630, 23597, 23564, 23530, 23498, 23465, 23432, 23399, 23366, 23334,
+    23302, 23269, 23237, 23205, 23173, 23141, 23109, 23077, 23046, 23014, 22982, 22951, 22920, 22888, 22857, 22826,
+    22795, 22764, 22733, 22703, 22672, 22641, 22611, 22580, 22550, 22520, 22490, 22459, 22429, 22400, 22370, 22340,
+    22310, 22281, 22251, 22221, 22192, 22163, 22134, 22104, 22075, 22046, 22017, 21988, 21959, 21931, 21902, 21874,
+    21845, 21817, 21788, 21760, 21732, 21704, 21676, 21648, 21620, 21592, 21565, 21537, 21509, 21482, 21455, 21427,
+    21400, 21372, 21345, 21318, 21291, 21264, 21237, 21210, 21183, 21157, 21130, 21103, 21077, 21050, 21024, 20998,
+    20971, 20945, 20919, 20893, 20867, 20841, 20816, 20790, 20764, 20738, 20713, 20687, 20662, 20636, 20611, 20586,
+    20560, 20535, 20510, 20485, 20460, 20435, 20410, 20385, 20360, 20336, 20311, 20287, 20262, 20238, 20213, 20189,
+    20165, 20141, 20117, 20092, 20068, 20044, 20021, 19997, 19973, 19949, 19926, 19902, 19878, 19855, 19832, 19808,
+    19784, 19762, 19738, 19715, 19692, 19668, 19645, 19622, 19600, 19577, 19553, 19531, 19508, 19485, 19463, 19440,
+    19418, 19395, 19373, 19351, 19328, 19306, 19284, 19262, 19240, 19218, 19196, 19174, 19152, 19130, 19109, 19087,
+    19065, 19044, 19022, 19000, 18979, 18958, 18936, 18915, 18893, 18872, 18851, 18830, 18809, 18787, 18766, 18745,
+    18725, 18704, 18682, 18662, 18641, 18620, 18600, 18579, 18559, 18538, 18518, 18497, 18477, 18457, 18436, 18416,
+    18396, 18376, 18356, 18336, 18316, 18296, 18276, 18256, 18236, 18216, 18197, 18177, 18157, 18138, 18118, 18099,
+    18079, 18059, 18040, 18021, 18001, 17982, 17963, 17944, 17924, 17905, 17886, 17867, 17848, 17829, 17810, 17791,
+    17772, 17754, 17735, 17716, 17697, 17679, 17660, 17641, 17623, 17604, 17586, 17568, 17549, 17531, 17513, 17494,
+    17476, 17458, 17440, 17422, 17404, 17386, 17368, 17350, 17332, 17314, 17296, 17278, 17261, 17243, 17225, 17208,
+    17190, 17172, 17155, 17137, 17120, 17102, 17085, 17067, 17050, 17033, 17015, 16999, 16981, 16964, 16947, 16930,
+    16913, 16895, 16878, 16862, 16845, 16828, 16810, 16794, 16777, 16760, 16743, 16727, 16710, 16693, 16677, 16660,
+    16644, 16627, 16611, 16594, 16578, 16562, 16545, 16529, 16513, 16497, 16480, 16464, 16448, 16432, 16416, 16400,
+    16384};

+ 90 - 0
Tests/UnitTest/TestCases/Common/Softmax/softmax_int8_to_int16_template.json

@@ -0,0 +1,90 @@
+{
+  "version": 3,
+  "operator_codes": [
+    {
+      "deprecated_builtin_code": 25,
+      "version": 2,
+      "builtin_code": "SOFTMAX"
+    }
+  ],
+  "subgraphs": [
+    {
+      "tensors": [
+        {
+          "shape": [
+            num_rows,
+            row_size
+          ],
+          "type": "INT8",
+          "buffer": 1,
+          "name": "softmax_input",
+          "quantization": {
+            "scale": [
+              input_scale
+            ],
+            "zero_point": [
+              input_zp
+            ]
+          }
+        },
+        {
+          "shape": [
+            num_rows,
+            row_size
+          ],
+          "type": "INT16",
+          "buffer": 2,
+          "name": "softmax_output",
+          "quantization": {
+            "scale": [
+              0.0000152587890625
+            ],
+            "zero_point": [
+              -32768
+            ]
+          }
+        }
+      ],
+      "inputs": [
+        0
+      ],
+      "outputs": [
+        1
+      ],
+      "operators": [
+        {
+          "inputs": [
+            0
+          ],
+          "outputs": [
+            1
+          ],
+          "builtin_options_type": "SoftmaxOptions",
+          "builtin_options": {
+            "beta": 1.0
+          }
+        }
+      ],
+      "name": "main"
+    }
+  ],
+  "description": "MLIR Converted.",
+  "buffers": [
+    {
+    },
+    {
+    },
+    {
+    },
+    {
+      "data": [
+      ]
+    }
+  ],
+  "metadata": [
+    {
+      "name": "min_runtime_version",
+      "buffer": 3
+    }
+  ]
+}

+ 1 - 1
Tests/UnitTest/TestCases/TestData/softmax/input_data.h

@@ -2,4 +2,4 @@
 #pragma once
 #include <stdint.h>
 
-const int8_t softmax_input[5] = {43, 49, 71, 1, 64};
+const q7_t softmax_input[5] = {-99, -122, -67, 10, -82};

+ 1 - 1
Tests/UnitTest/TestCases/TestData/softmax/output_ref_data.h

@@ -2,4 +2,4 @@
 #pragma once
 #include <stdint.h>
 
-const q7_t softmax_output_ref[5] = {-78, -76, -72, -85, -73};
+const q7_t softmax_output_ref[5] = {-83, -87, -77, -59, -80};

+ 7 - 0
Tests/UnitTest/TestCases/TestData/softmax_s16/config_data.h

@@ -0,0 +1,7 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define SOFTMAX_S16_NUM_ROWS 1
+#define SOFTMAX_S16_ROW_SIZE 10
+#define SOFTMAX_S16_INPUT_MULT 1718013132
+#define SOFTMAX_S16_INPUT_LEFT_SHIFT -2
+#define SOFTMAX_S16_DST_SIZE 10

+ 5 - 0
Tests/UnitTest/TestCases/TestData/softmax_s16/input_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t softmax_s16_input[10] = {1847, 28314, -15399, -21445, 23646, -32766, 29521, -5296, -1753, -16004};

+ 5 - 0
Tests/UnitTest/TestCases/TestData/softmax_s16/output_ref_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t softmax_s16_output_ref[10] = {2920, 6547, 1725, 1434, 5678, 1015, 6793, 2347, 2616, 1693};

+ 4 - 0
Tests/UnitTest/TestCases/TestData/softmax_s16/test_data.h

@@ -0,0 +1,4 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input_data.h"
+#include "output_ref_data.h"

+ 8 - 0
Tests/UnitTest/TestCases/TestData/softmax_s8_s16/config_data.h

@@ -0,0 +1,8 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#define SOFTMAX_S8_S16_NUM_ROWS 1
+#define SOFTMAX_S8_S16_ROW_SIZE 12
+#define SOFTMAX_S8_S16_INPUT_MULT 1078071151
+#define SOFTMAX_S8_S16_INPUT_LEFT_SHIFT 19
+#define SOFTMAX_S8_S16_DIFF_MIN -3968
+#define SOFTMAX_S8_S16_DST_SIZE 12

+ 5 - 0
Tests/UnitTest/TestCases/TestData/softmax_s8_s16/input_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q7_t softmax_s8_s16_input[12] = {-50, 22, 59, -69, 15, 31, -73, 74, -57, -79, -107, 19};

+ 6 - 0
Tests/UnitTest/TestCases/TestData/softmax_s8_s16/output_ref_data.h

@@ -0,0 +1,6 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const q15_t softmax_s8_s16_output_ref[12] =
+    {-28076, -26545, -25573, -28413, -26714, -26322, -28481, -25138, -28203, -28581, -29016, -26618};

+ 4 - 0
Tests/UnitTest/TestCases/TestData/softmax_s8_s16/test_data.h

@@ -0,0 +1,4 @@
+// Generated by generate_test_data.py using TFL version 2.6.0 as reference.
+#include "config_data.h"
+#include "input_data.h"
+#include "output_ref_data.h"

+ 23 - 0
Tests/UnitTest/TestCases/test_arm_softmax_s16/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2010-2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_softmax_s16)
+
+target_sources(test_arm_softmax_s16 PRIVATE
+    Unity/unity_test_arm_softmax_s16.c
+    Unity/TestRunner/unity_test_arm_softmax_s16_runner.c)

+ 47 - 0
Tests/UnitTest/TestCases/test_arm_softmax_s16/Unity/unity_test_arm_softmax_s16.c

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_softmax_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_softmax_s16_arm_softmax_s16(void) { softmax_s16_arm_softmax_s16(); }

+ 45 - 0
Tests/UnitTest/TestCases/test_arm_softmax_s16/test_arm_softmax_s16.c

@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "unity.h"
+#include <arm_nnfunctions.h>
+
+#include "../Common/Softmax/exp_lut_data.h"
+#include "../Common/Softmax/one_by_one_lut_data.h"
+#include "../TestData/softmax_s16/test_data.h"
+#include "../Utils/validate.h"
+
+#define REPEAT_NUM (2)
+
+void softmax_s16_arm_softmax_s16(void)
+{
+    const int32_t num_rows = SOFTMAX_S16_NUM_ROWS;
+    const int32_t row_size = SOFTMAX_S16_ROW_SIZE;
+    const int32_t mult = SOFTMAX_S16_INPUT_MULT;
+    const int32_t shift = SOFTMAX_S16_INPUT_LEFT_SHIFT;
+    const q15_t *input_data = softmax_s16_input;
+    const cmsis_nn_softmax_lut_s16 softmax_params = {.exp_lut = softmax_s16_exp_lut,
+                                                     .one_by_one_lut = softmax_s16_one_by_one_lut};
+    int16_t output[SOFTMAX_S16_DST_SIZE];
+
+    for (int i = 0; i < REPEAT_NUM; i++)
+    {
+        arm_softmax_s16(input_data, num_rows, row_size, mult, shift, &softmax_params, output);
+        TEST_ASSERT_TRUE(validate_s16(output, softmax_s16_output_ref, SOFTMAX_S16_DST_SIZE));
+    }
+}

+ 8 - 2
Tests/UnitTest/TestCases/test_arm_softmax_s8/test_arm_softmax_s8.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -43,7 +43,6 @@ void softmax_arm_softmax_s8(void)
 
 void softmax_invalid_diff_min_arm_softmax_s8(void)
 {
-    const q7_t softmax_expect_invalid_output[] = {-128, -128, -128, -128, -128};
     const int32_t num_rows = SOFTMAX_NUM_ROWS;
     const int32_t row_size = SOFTMAX_ROW_SIZE;
     const int32_t mult = SOFTMAX_INPUT_MULT;
@@ -52,9 +51,16 @@ void softmax_invalid_diff_min_arm_softmax_s8(void)
     const q7_t *input_data = softmax_input;
     int8_t output[SOFTMAX_DST_SIZE];
 
+    q7_t *softmax_expect_invalid_output = malloc(SOFTMAX_DST_SIZE);
+    for (int i = 0; i < SOFTMAX_DST_SIZE; i++)
+    {
+        softmax_expect_invalid_output[i] = -128;
+    }
+
     for (int i = 0; i < REPEAT_NUM; i++)
     {
         arm_softmax_s8(input_data, num_rows, row_size, mult, shift, diff_min, output);
         TEST_ASSERT_TRUE(validate(output, softmax_expect_invalid_output, SOFTMAX_DST_SIZE));
     }
+    free(softmax_expect_invalid_output);
 }

+ 23 - 0
Tests/UnitTest/TestCases/test_arm_softmax_s8_s16/CMakeLists.txt

@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2010-2022 Arm Limited or its affiliates.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_cmsis_nn_unit_test_executable(test_arm_softmax_s8_s16)
+
+target_sources(test_arm_softmax_s8_s16 PRIVATE
+    Unity/unity_test_arm_softmax_s8_s16.c
+    Unity/TestRunner/unity_test_arm_softmax_s8_s16_runner.c)

+ 49 - 0
Tests/UnitTest/TestCases/test_arm_softmax_s8_s16/Unity/unity_test_arm_softmax_s8_s16.c

@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../test_arm_softmax_s8_s16.c"
+#include "unity.h"
+
+#ifdef USING_FVP_CORSTONE_300
+extern void uart_init(void);
+#endif
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void setUp(void)
+{ /* This is run before EACH TEST */
+#ifdef USING_FVP_CORSTONE_300
+    uart_init();
+#endif
+}
+
+/* This function is called from the autogenerated file.
+ * The name must be exactly like this
+ */
+void tearDown(void) {}
+
+void test_softmax_arm_softmax_s8_s16(void) { softmax_s8_s16_arm_softmax_s8_s16(); }
+
+void test_softmax1_arm_softmax_s8_s16(void) { softmax_s8_s16_invalid_diff_min_arm_softmax_s8_s16(); }

+ 66 - 0
Tests/UnitTest/TestCases/test_arm_softmax_s8_s16/test_arm_softmax_s8_s16.c

@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010-2022 Arm Limited or its affiliates.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "unity.h"
+#include <arm_nnfunctions.h>
+
+#include "../TestData/softmax_s8_s16/test_data.h"
+#include "../Utils/validate.h"
+
+#define REPEAT_NUM (2)
+
+void softmax_s8_s16_arm_softmax_s8_s16(void)
+{
+    const int32_t num_rows = SOFTMAX_S8_S16_NUM_ROWS;
+    const int32_t row_size = SOFTMAX_S8_S16_ROW_SIZE;
+    const int32_t mult = SOFTMAX_S8_S16_INPUT_MULT;
+    const int32_t shift = SOFTMAX_S8_S16_INPUT_LEFT_SHIFT;
+    const int32_t diff_min = SOFTMAX_S8_S16_DIFF_MIN;
+    const q7_t *input_data = softmax_s8_s16_input;
+    int16_t output[SOFTMAX_S8_S16_DST_SIZE];
+
+    for (int i = 0; i < REPEAT_NUM; i++)
+    {
+        arm_softmax_s8_s16(input_data, num_rows, row_size, mult, shift, diff_min, output);
+        TEST_ASSERT_TRUE(validate_s16(output, softmax_s8_s16_output_ref, SOFTMAX_S8_S16_DST_SIZE));
+    }
+}
+
+void softmax_s8_s16_invalid_diff_min_arm_softmax_s8_s16(void)
+{
+    const int32_t num_rows = SOFTMAX_S8_S16_NUM_ROWS;
+    const int32_t row_size = SOFTMAX_S8_S16_ROW_SIZE;
+    const int32_t mult = SOFTMAX_S8_S16_INPUT_MULT;
+    const int32_t shift = SOFTMAX_S8_S16_INPUT_LEFT_SHIFT;
+    const int32_t diff_min = 0x7FFFFFFF;
+    const q7_t *input_data = softmax_s8_s16_input;
+    int16_t output[SOFTMAX_S8_S16_DST_SIZE];
+
+    q15_t *softmax_s8_s16_expect_invalid_output = malloc(SOFTMAX_S8_S16_DST_SIZE * sizeof(q15_t));
+    for (int i = 0; i < SOFTMAX_S8_S16_DST_SIZE; i++)
+    {
+        softmax_s8_s16_expect_invalid_output[i] = -32768;
+    }
+
+    for (int i = 0; i < REPEAT_NUM; i++)
+    {
+        arm_softmax_s8_s16(input_data, num_rows, row_size, mult, shift, diff_min, output);
+        TEST_ASSERT_TRUE(validate_s16(output, softmax_s8_s16_expect_invalid_output, SOFTMAX_S8_S16_DST_SIZE));
+    }
+    free(softmax_s8_s16_expect_invalid_output);
+}

+ 131 - 75
Tests/UnitTest/generate_test_data.py

@@ -39,6 +39,7 @@ except Exception as e:
 REQUIRED_MINIMUM_TENSORFLOW_VERSION = version.parse("2.5")
 ALL_TESTDATA_SETS = {}
 CLANG_FORMAT = 'clang-format-9 -i'
+
 INT32_MAX = 2147483647
 INT32_MIN = -2147483648
 INT16_MAX = 32767
@@ -81,7 +82,7 @@ class TestSettings(ABC):
     # It also convinient when tesing changes in the script, to be able to run all test sets again.
     PREGEN = 'PregeneratedData/'
 
-    def __init__(self, dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x, stride_y, pad,
+    def __init__(self, dataset, testtype, args, in_ch, out_ch, x_in, y_in, w_x, w_y, stride_x=1, stride_y=1, pad=False,
                  randmin=INT8_MIN, randmax=INT8_MAX, batches=1, generate_bias=True, relu6=False,
                  out_activation_min=None, out_activation_max=None, int16xint8=False, bias_min=None, bias_max=None,
                  dilation_x=1, dilation_y=1):
@@ -417,6 +418,63 @@ class TestSettings(ABC):
 
         return interpreter
 
+    def generate_json_from_template(self, weights_feature_data=None, weights_time_data=None, bias_data=None):
+        """
+        Takes a json template and parameters as input and creates a new json file.
+        """
+        generated_json_file = self.model_path + '.json'
+
+        with open(self.json_template, 'r') as in_file, open(generated_json_file, 'w') as out_file:
+            # Update shapes, scales and zero points
+            data = in_file.read()
+            for item, to_replace in self.json_replacements.items():
+                data = data.replace(item, str(to_replace))
+
+            data = json.loads(data)
+
+            # Update weights and bias data
+            if weights_feature_data is not None:
+                w_1_buffer_index = 1
+                data["buffers"][w_1_buffer_index]["data"] = self.to_bytes(weights_feature_data.numpy().ravel(), 1)
+            if weights_time_data is not None:
+                w_2_buffer_index = 2
+                data["buffers"][w_2_buffer_index]["data"] = self.to_bytes(weights_time_data.numpy().ravel(), 2)
+            if bias_data is not None:
+                bias_buffer_index = 3
+                data["buffers"][bias_buffer_index]["data"] = self.to_bytes(bias_data.numpy().ravel(), 4)
+
+            json.dump(data, out_file, indent=2)
+
+        return generated_json_file
+
+    def flatc_generate_tflite(self, json_input, schema):
+        flatc = 'flatc'
+        if schema is None:
+            raise RuntimeError("A schema file is required.")
+        command = "{} -o {} -c -b {} {}".format(flatc, self.headers_dir, schema, json_input)
+        command_list = command.split(' ')
+        process = subprocess.run(command_list)
+        if process.returncode != 0:
+            raise RuntimeError("The following command failed: {}. Did you install flatc?".format(command))
+
+    def to_bytes(self, tensor_data, type_size):
+        result_bytes = []
+
+        if type_size == 1:
+            tensor_type = np.uint8
+        elif type_size == 2:
+            tensor_type = np.uint16
+        elif type_size == 4:
+            tensor_type = np.uint32
+        else:
+            raise RuntimeError("Size not supported: {}".format(type_size))
+
+        for val in tensor_data:
+            for byte in int(tensor_type(val)).to_bytes(type_size, 'little'):
+                result_bytes.append(byte)
+
+        return result_bytes
+
 
 class ConvSettings(TestSettings):
 
@@ -723,21 +781,38 @@ class FullyConnectedSettings(TestSettings):
 class SoftmaxSettings(TestSettings):
     softmax_input_integer_bits = 5
 
-    def __init__(self, dataset, testtype, args, x_in=5, y_in=1, randmin=INT8_MIN, randmax=INT8_MAX):
+    def __init__(self, dataset, testtype, args, x_in=5, y_in=1, randmin=INT8_MIN, randmax=INT8_MAX, int16xint8=False,
+                 inInt8outInt16=False, input_scale=0.003922, input_zp=-128):
         super().__init__(dataset, testtype, args, 1, 1, x_in, y_in, 1, 1, 1, 1, False, randmin,
-                         randmax)
+                         randmax, int16xint8=int16xint8)
         self.x_input = self.x_output = x_in
         self.y_input = self.y_output = y_in
+        self.inInt8outInt16 = inInt8outInt16
+
+        if self.inInt8outInt16 and self.is_int16xint8:
+            raise RuntimeError("Specify input as either s8 or s16")
+
+        if self.inInt8outInt16:
+            self.input_scale = input_scale
+            self.json_template = "TestCases/Common/Softmax/softmax_int8_to_int16_template.json"
+            self.json_replacements = {"num_rows": self.y_input,
+                                      "row_size": self.x_input,
+                                      "input_scale": input_scale,
+                                      "input_zp": input_zp}
 
     def calc_softmax_params(self):
-        input_real_multiplier = min(self.input_scale * (1 << (31 - self.softmax_input_integer_bits)),
-                                    (1 << 31) - 1)
-        (self.input_multiplier, self.input_left_shift) = self.quantize_scale(input_real_multiplier)
+        if self.is_int16xint8:
+            input_scale_beta_rescale = self.input_scale / (10.0 / 65535.0)
+            (self.input_multiplier, self.input_left_shift) = self.quantize_scale(input_scale_beta_rescale)
+        else:
+            input_real_multiplier = min(self.input_scale * (1 << (31 - self.softmax_input_integer_bits)),
+                                        (1 << 31) - 1)
+            (self.input_multiplier, self.input_left_shift) = self.quantize_scale(input_real_multiplier)
 
-        self.diff_min = ((1 << self.softmax_input_integer_bits) - 1) * \
-                        (1 << (31 - self.softmax_input_integer_bits)) / \
-                        (1 << self.input_left_shift)
-        self.diff_min = math.floor(self.diff_min)
+            self.diff_min = ((1 << self.softmax_input_integer_bits) - 1) * \
+                            (1 << (31 - self.softmax_input_integer_bits)) / \
+                            (1 << self.input_left_shift)
+            self.diff_min = math.floor(self.diff_min)
 
     def write_c_config_header(self):
         super().write_c_config_header(write_common_parameters=False)
@@ -751,7 +826,8 @@ class SoftmaxSettings(TestSettings):
             f.write("#define {}_ROW_SIZE {}\n".format(prefix, self.x_input))
             f.write("#define {}_INPUT_MULT {}\n".format(prefix, self.input_multiplier))
             f.write("#define {}_INPUT_LEFT_SHIFT {}\n".format(prefix, self.input_left_shift))
-            f.write("#define {}_DIFF_MIN {}\n".format(prefix, -self.diff_min))
+            if not self.is_int16xint8:
+                f.write("#define {}_DIFF_MIN {}\n".format(prefix, -self.diff_min))
             f.write("#define {}_DST_SIZE {}\n".format(prefix, self.x_output * self.y_output))
 
     def get_softmax_randomized_input_data(self, input_data, input_shape):
@@ -766,22 +842,49 @@ class SoftmaxSettings(TestSettings):
 
     def generate_data(self, input_data=None, weights=None, biases=None):
         input_data = self.get_softmax_randomized_input_data(input_data, [self.y_input, self.x_input])
-        self.generate_c_array("input", input_data, datatype="int8_t")
 
-        # Create a one-layer Keras model.
-        model = tf.keras.models.Sequential()
-        input_shape = (self.y_input, self.x_input)
-        model.add(tf.keras.layers.Softmax(input_shape=input_shape[1:]))
+        if self.is_int16xint8:
+            inttype = tf.int16
+            datatype = "q15_t"
+        else:
+            inttype = tf.int8
+            datatype = "q7_t"
+
+        self.generate_c_array("input", input_data, datatype=datatype)
 
-        interpreter = self.convert_and_interpret(model, tf.int8, input_data)
+        # Generate reference.
+        if self.inInt8outInt16:
+            # Output is int16.
+            datatype = "q15_t"
 
-        self.calc_softmax_params()
+            # Keras does not support int8 input and int16 output for Softmax.
+            # Using a template json instead.
+            generated_json = self.generate_json_from_template()
+            self.flatc_generate_tflite(generated_json, args.schema_file)
 
-        # Generate reference
-        output_details = interpreter.get_output_details()
-        interpreter.invoke()
-        output_data = interpreter.get_tensor(output_details[0]["index"])
-        self.generate_c_array("output_ref", output_data)
+            interpreter = Interpreter(
+                model_path=str(self.model_path_tflite), experimental_op_resolver_type=OpResolverType.BUILTIN_REF)
+            interpreter.allocate_tensors()
+            all_layers_details = interpreter.get_tensor_details()
+            input_layer = all_layers_details[0]
+            output_layer = all_layers_details[1]
+
+            interpreter.set_tensor(input_layer["index"], tf.cast(input_data, tf.int8))
+            interpreter.invoke()
+            output_data = interpreter.get_tensor(output_layer["index"])
+        else:
+            # Create a one-layer Keras model.
+            model = tf.keras.models.Sequential()
+            input_shape = (self.y_input, self.x_input)
+            model.add(tf.keras.layers.Softmax(input_shape=input_shape[1:]))
+
+            interpreter = self.convert_and_interpret(model, inttype, input_data)
+            output_details = interpreter.get_output_details()
+            interpreter.invoke()
+            output_data = interpreter.get_tensor(output_details[0]["index"])
+
+        self.calc_softmax_params()
+        self.generate_c_array("output_ref", output_data, datatype=datatype)
 
         self.write_c_config_header()
         self.write_c_header_wrapper()
@@ -940,61 +1043,9 @@ class SVDFSettings(TestSettings):
         self.write_c_config_header()
         self.write_c_header_wrapper()
 
-    def flatc_generate_tflite(self, json_input, schema):
-        flatc = 'flatc'
-        if schema is None:
-            raise RuntimeError("A schema file is required.")
-        command = "{} -o {} -c -b {} {}".format(flatc, self.headers_dir, schema, json_input)
-        command_list = command.split(' ')
-        process = subprocess.run(command_list)
-        if process.returncode != 0:
-            raise RuntimeError("The following command failed: {}. Did you install flatc?".format(command))
-
     def get_scale_and_zp(self, layer):
         return (layer['quantization_parameters']['scales'][0], layer['quantization_parameters']['zero_points'][0])
 
-    def to_bytes(self, tensor_data, type_size):
-        result_bytes = []
-
-        if type_size == 1:
-            tensor_type = np.uint8
-        elif type_size == 2:
-            tensor_type = np.uint16
-        elif type_size == 4:
-            tensor_type = np.uint32
-        else:
-            raise RuntimeError("Size not supported: {}".format(type_size))
-
-        for val in tensor_data:
-            for byte in int(tensor_type(val)).to_bytes(type_size, 'little'):
-                result_bytes.append(byte)
-
-        return result_bytes
-
-    def generate_json_from_template(self, weights_feature_data, weights_time_data, bias_data):
-        """
-        Takes a json template and parameters as input and creates a new json file.
-        """
-        w_1_buffer_index = 1
-        w_2_buffer_index = 2
-        bias_buffer_index = 3
-        generated_json_file = self.model_path + '.json'
-
-        with open(self.json_template, 'r') as in_file, open(generated_json_file, 'w') as out_file:
-            # Update shapes, scales and zero points
-            data = in_file.read()
-            for item, to_replace in self.json_replacements.items():
-                data = data.replace(item, str(to_replace))
-
-            # Update weights and bias data
-            data = json.loads(data)
-            data["buffers"][w_1_buffer_index]["data"] = self.to_bytes(weights_feature_data.numpy().ravel(), 1)
-            data["buffers"][w_2_buffer_index]["data"] = self.to_bytes(weights_time_data.numpy().ravel(), 2)
-            data["buffers"][bias_buffer_index]["data"] = self.to_bytes(bias_data.numpy().ravel(), 4)
-            json.dump(data, out_file, indent=2)
-
-        return generated_json_file
-
 
 class AddMulSettings(TestSettings):
 
@@ -1341,6 +1392,11 @@ def load_all_testdatasets():
     type_of_test = 'softmax'
     dataset = 'softmax'
     ALL_TESTDATA_SETS[dataset] = SoftmaxSettings(dataset, type_of_test, args, x_in=5, y_in=1)
+    dataset = 'softmax_s16'
+    ALL_TESTDATA_SETS[dataset] = SoftmaxSettings(dataset, type_of_test, args, x_in=10, y_in=1, int16xint8=True,
+                                                 randmin=INT16_MIN, randmax=INT16_MAX)
+    dataset = 'softmax_s8_s16'
+    ALL_TESTDATA_SETS[dataset] = SoftmaxSettings(dataset, type_of_test, args, x_in=12, y_in=1, inInt8outInt16=True)
 
     type_of_test = 'svdf'
     dataset = 'svdf'