Przeglądaj źródła

CMSIS-NN: Optimize elementwise s16 variants of add/mul (#1482)

* CMSIS-NN: Optimize elementwise s16 variants of add/mul

Change-Id: I6c37773e014e44959644cc59ad7255245290fbd4

* CMSIS-NN: Update README for add/mul int16
Måns Nilsson 3 lat temu
rodzic
commit
d2d6d92c2f
23 zmienionych plików z 413 dodań i 12 usunięć
  1. 7 2
      Include/arm_nnsupportfunctions.h
  2. 2 2
      README.md
  3. 35 4
      Source/BasicMathFunctions/arm_elementwise_add_s16.c
  4. 31 4
      Source/BasicMathFunctions/arm_elementwise_mul_s16.c
  5. 16 0
      Tests/UnitTest/PregeneratedData/add_s16_spill/input.txt
  6. 16 0
      Tests/UnitTest/PregeneratedData/add_s16_spill/kernel.txt
  7. 36 0
      Tests/UnitTest/PregeneratedData/mul_s16_spill/input.txt
  8. 36 0
      Tests/UnitTest/PregeneratedData/mul_s16_spill/kernel.txt
  9. 15 0
      Tests/UnitTest/TestCases/TestData/add_s16_spill/config_data.h
  10. 13 0
      Tests/UnitTest/TestCases/TestData/add_s16_spill/input1_data.h
  11. 13 0
      Tests/UnitTest/TestCases/TestData/add_s16_spill/input2_data.h
  12. 12 0
      Tests/UnitTest/TestCases/TestData/add_s16_spill/output_ref_data.h
  13. 5 0
      Tests/UnitTest/TestCases/TestData/add_s16_spill/test_data.h
  14. 10 0
      Tests/UnitTest/TestCases/TestData/mul_s16_spill/config_data.h
  15. 23 0
      Tests/UnitTest/TestCases/TestData/mul_s16_spill/input1_data.h
  16. 23 0
      Tests/UnitTest/TestCases/TestData/mul_s16_spill/input2_data.h
  17. 23 0
      Tests/UnitTest/TestCases/TestData/mul_s16_spill/output_ref_data.h
  18. 5 0
      Tests/UnitTest/TestCases/TestData/mul_s16_spill/test_data.h
  19. 1 0
      Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c
  20. 46 0
      Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c
  21. 2 0
      Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c
  22. 35 0
      Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c
  23. 8 0
      Tests/UnitTest/generate_test_data.py

+ 7 - 2
Include/arm_nnsupportfunctions.h

@@ -21,8 +21,8 @@
  * Title:        arm_nnsupportfunctions.h
  * Description:  Public header file of support functions for CMSIS NN Library
  *
- * $Date:        20. April 2022
- * $Revision:    V.8.0.0
+ * $Date:        10 May 2022
+ * $Revision:    V.8.1.0
  *
  * Target Processor:  Cortex-M CPUs
  * -------------------------------------------------------------------- */
@@ -57,6 +57,11 @@ extern "C" {
     ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) |                     \
      (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
 
+/**
+ * @brief definition to pack two 16 bit values.
+ */
+#define PACK_Q15x2_32x1(v0, v1) (((int32_t)v0 & (int32_t)0xFFFF) | ((int32_t)v1 << 16))
+
 /**
  * @brief Union for SIMD access of q31/q15/q7 types
  */

+ 2 - 2
README.md

@@ -57,9 +57,9 @@ Group | API | Base Operator | Input Constraints | Additional memory required for
 |[Misc](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| |  ||
 ||arm_reshape_s8()| SOFTMAX | None | None | No | No | |
 ||arm_elementwise_add_s8()| ELEMENTWISE ADD | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
-||arm_elementwise_add_s16()| ELEMENTWISE ADD | None | None | No| No| Reshape is not done in this function <br/> Only minor improvements are expected |
+||arm_elementwise_add_s16()| ELEMENTWISE ADD | None | None | Yes| No| Reshape is not done in this function <br/> Only minor improvements are expected |
 ||arm_elementwise_mul_s8()| ELEMENTWISE MUL | None | None | Yes| Yes| Reshape is not done in this function <br/> Only minor improvements are expected |
-||arm_elementwise_mul_s16()| ELEMENTWISE MUL | None | None | No| No| Reshape is not done in this function <br/> Only minor improvements are expected |
+||arm_elementwise_mul_s16()| ELEMENTWISE MUL | None | None | Yes| No| Reshape is not done in this function <br/> Only minor improvements are expected |
 ||arm_relu_q7() | RELU | None | None | Yes| No|
 ||arm_relu6_s8() | RELU | None | None | Yes| No|
 |[Concat](https://arm-software.github.io/CMSIS_5/NN/html/group__groupNN.html)||||| |  ||

+ 35 - 4
Source/BasicMathFunctions/arm_elementwise_add_s16.c

@@ -21,8 +21,8 @@
  * Title:        arm_elementwise_add_s16
  * Description:  Elementwise add
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        10 May 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M CPUs
  *
@@ -69,12 +69,43 @@ arm_cmsis_nn_status arm_elementwise_add_s16(const int16_t *input_1_vect,
     (void)input_1_offset;
     (void)input_2_offset;
     (void)out_offset;
-    int32_t loop_count;
     int32_t input_1;
     int32_t input_2;
     int32_t sum;
+    int32_t two_halfword_1, two_halfword_2;
+    int16_t sum_1, sum_2;
+    int32_t loop_count = block_size / 2;
 
-    loop_count = block_size;
+    while (loop_count > 0)
+    {
+        two_halfword_1 = arm_nn_read_q15x2_ia(&input_1_vect);
+        two_halfword_2 = arm_nn_read_q15x2_ia(&input_2_vect);
+
+        input_1 = (int16_t)(two_halfword_1 & 0xFFFF) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = (int16_t)(two_halfword_2 & 0xFFFF) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+        sum = input_1 + input_2;
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
+        sum = MAX(sum, out_activation_min);
+        sum = MIN(sum, out_activation_max);
+        sum_1 = (int16_t)sum;
+
+        input_1 = (int16_t)(two_halfword_1 >> 16) << left_shift;
+        input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
+        input_2 = (int16_t)(two_halfword_2 >> 16) << left_shift;
+        input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
+        sum = input_1 + input_2;
+        sum = arm_nn_requantize(sum, out_mult, out_shift);
+        sum = MAX(sum, out_activation_min);
+        sum = MIN(sum, out_activation_max);
+        sum_2 = (int16_t)sum;
+
+        arm_nn_write_q15x2_ia(&output, PACK_Q15x2_32x1(sum_1, sum_2));
+
+        loop_count--;
+    }
+    loop_count = block_size & 0x1;
 
     while (loop_count > 0)
     {

+ 31 - 4
Source/BasicMathFunctions/arm_elementwise_mul_s16.c

@@ -21,8 +21,8 @@
  * Title:        arm_elementwise_mul_s16
  * Description:  Element wise multiplication
  *
- * $Date:        19 April 2022
- * $Revision:    V.2.0.0
+ * $Date:        10 May 2022
+ * $Revision:    V.2.1.0
  *
  * Target Processor:  Cortex-M cores
  *
@@ -61,12 +61,39 @@ arm_cmsis_nn_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
     (void)input_1_offset;
     (void)input_2_offset;
     (void)out_offset;
-    int32_t loop_count;
     int32_t input_1;
     int32_t input_2;
     int32_t mul_res;
+    int32_t two_halfword_1, two_halfword_2;
+    int16_t mul_1, mul_2;
+    int32_t loop_count = block_size / 2;
 
-    loop_count = block_size;
+    while (loop_count > 0)
+    {
+        two_halfword_1 = arm_nn_read_q15x2_ia(&input_1_vect);
+        two_halfword_2 = arm_nn_read_q15x2_ia(&input_2_vect);
+
+        input_1 = (int16_t)(two_halfword_1 & 0xFFFF);
+        input_2 = (int16_t)(two_halfword_2 & 0xFFFF);
+        mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+        mul_res = MAX(mul_res, out_activation_min);
+        mul_res = MIN(mul_res, out_activation_max);
+        mul_1 = (int16_t)mul_res;
+
+        input_1 = (int16_t)(two_halfword_1 >> 16);
+        input_2 = (int16_t)(two_halfword_2 >> 16);
+        mul_res = input_1 * input_2;
+        mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
+        mul_res = MAX(mul_res, out_activation_min);
+        mul_res = MIN(mul_res, out_activation_max);
+        mul_2 = (int16_t)mul_res;
+
+        arm_nn_write_q15x2_ia(&output, PACK_Q15x2_32x1(mul_1, mul_2));
+
+        loop_count--;
+    }
+    loop_count = block_size & 0x1;
 
     while (loop_count > 0)
     {

+ 16 - 0
Tests/UnitTest/PregeneratedData/add_s16_spill/input.txt

@@ -0,0 +1,16 @@
+# 1,3,5,7
+2.491657421875000000e+04,3.256466015625000000e+04,3.404785156250000000e+03,-8.132375000000000000e+03,-1.381603906250000000e+04,9.168718750000000000e+03,-2.548325976562500000e+04
+3.818503906250000000e+03,7.630148437500000000e+03,-3.166457226562500000e+04,-1.449144335937500000e+04,3.213817187500000000e+04,-2.191388671875000000e+03,-2.896478710937500000e+04
+2.436121484375000000e+04,-6.775755859375000000e+03,-2.621737304687500000e+04,1.050399609375000000e+04,2.995717578125000000e+04,9.877890625000000000e+02,-9.398691406250000000e+02
+9.274781250000000000e+03,5.375410156250000000e+03,1.273482812500000000e+04,1.254899609375000000e+04,-1.302634765625000000e+04,1.710412109375000000e+04,-2.114406835937500000e+04
+2.561275000000000000e+04,-2.829564648437500000e+04,1.158112109375000000e+04,-1.113623046875000000e+03,-1.491258398437500000e+04,-1.805123242187500000e+04,-2.631413671875000000e+04
+1.051169921875000000e+04,2.905102734375000000e+04,2.229036328125000000e+04,-1.487356250000000000e+04,2.763155468750000000e+04,-7.466800781250000000e+03,-1.042131835937500000e+04
+1.682746093750000000e+04,-2.906675195312500000e+04,2.882469140625000000e+04,1.486021093750000000e+04,-1.118407226562500000e+04,-1.477995507812500000e+04,-2.535923828125000000e+04
+9.233632812500000000e+03,-1.277428125000000000e+04,3.186049218750000000e+04,2.393060546875000000e+04,-3.032906250000000000e+03,3.969398437500000000e+03,-4.121347656250000000e+02
+-1.611746484375000000e+04,-4.323957031250000000e+03,-3.279941406250000000e+02,1.829847656250000000e+03,1.520991015625000000e+04,2.347265625000000000e+04,1.307419140625000000e+04
+-2.384478710937500000e+04,-1.354089453125000000e+04,7.147328125000000000e+03,1.219314062500000000e+04,1.303805078125000000e+04,2.716597656250000000e+04,-1.678454101562500000e+04
+-7.536289062500000000e+02,2.255489062500000000e+04,9.677929687500000000e+03,-1.249029296875000000e+04,-1.616174609375000000e+04,-1.613553515625000000e+04,1.733100781250000000e+04
+1.156532421875000000e+04,1.603111328125000000e+04,1.865084765625000000e+04,-6.358558593750000000e+03,-1.287191406250000000e+03,-1.155939453125000000e+04,-1.972720703125000000e+04
+-2.965363281250000000e+04,-5.173976562500000000e+03,-2.485300390625000000e+04,2.782621484375000000e+04,-3.251080859375000000e+04,1.006812207031250000e+04,-6.781230468750000000e+02
+-3.133764648437500000e+04,-2.512620312500000000e+04,-2.684999609375000000e+04,-1.023685156250000000e+04,7.634796875000000000e+03,1.329000781250000000e+04,-1.468734570312500000e+04
+-2.373301953125000000e+04,1.824034765625000000e+04,-3.108977539062500000e+04,1.363425781250000000e+03,-1.081496875000000000e+04,2.637154296875000000e+04,5.802285156250000000e+03

+ 16 - 0
Tests/UnitTest/PregeneratedData/add_s16_spill/kernel.txt

@@ -0,0 +1,16 @@
+# 1,3,5,7
+-5.961931640625000000e+03,3.985640625000000000e+03,-2.296183593750000000e+03,-2.704050195312500000e+04,2.769633593750000000e+04,1.863446484375000000e+04,1.174117187500000000e+04
+-1.106606640625000000e+04,-1.109100976562500000e+04,2.272357421875000000e+04,-1.458834765625000000e+04,-1.066968750000000000e+03,1.912344921875000000e+04,1.366009765625000000e+04
+1.044828515625000000e+04,1.409570312500000000e+03,9.203007812500000000e+03,-2.783186328125000000e+04,-1.239818554687500000e+04,9.167437500000000000e+03,2.394917187500000000e+04
+-1.882967382812500000e+04,1.010665234375000000e+04,-3.047660546875000000e+04,3.007885546875000000e+04,-1.244566992187500000e+04,2.021162890625000000e+04,2.214566015625000000e+04
+2.102085156250000000e+04,4.483773437500000000e+03,4.643968750000000000e+03,-1.301336328125000000e+04,-2.312496875000000000e+04,5.920582031250000000e+03,-8.723867187500000000e+03
+-3.017381250000000000e+04,9.830054687500000000e+03,3.261187890625000000e+04,-7.662414062500000000e+03,1.835483593750000000e+04,-1.229315625000000000e+04,1.224567968750000000e+04
+2.486330859375000000e+04,-1.130268750000000000e+04,3.235581250000000000e+04,2.887628515625000000e+04,-1.079960937500000000e+04,-2.238054101562500000e+04,-1.541396875000000000e+04
+-1.513059765625000000e+04,2.617305468750000000e+04,-1.307353515625000000e+04,-5.763560546875000000e+03,7.922207031250000000e+03,2.081054687500000000e+03,-3.141861523437500000e+04
+-1.430082812500000000e+04,2.085185546875000000e+04,-2.713341796875000000e+03,-1.155044140625000000e+04,-2.292146289062500000e+04,-6.424746093750000000e+02,-3.031541210937500000e+04
+-3.744677734375000000e+03,1.795752343750000000e+04,-2.484580664062500000e+04,2.705458593750000000e+04,-2.966466406250000000e+04,-2.940279492187500000e+04,-1.556841015625000000e+04
+2.269876953125000000e+04,-1.165873632812500000e+04,2.017591015625000000e+04,9.846437500000000000e+03,6.529234375000000000e+03,2.634056640625000000e+04,-1.124532031250000000e+04
+-6.727068359375000000e+03,-4.489369140625000000e+03,2.591300390625000000e+04,-6.331271484375000000e+03,-1.667087109375000000e+04,2.338447265625000000e+04,-2.266330273437500000e+04
+1.861023046875000000e+04,-1.944825000000000000e+04,1.562833203125000000e+04,9.850875000000000000e+03,-8.532519531250000000e+02,2.487134765625000000e+04,-8.248789062500000000e+03
+3.066958593750000000e+04,1.099281250000000000e+03,-2.384208984375000000e+04,6.590257812500000000e+03,-1.721826171875000000e+04,-1.474642480468750000e+04,-2.942682617187500000e+04
+-1.560333203125000000e+04,-6.378628906250000000e+03,1.051419921875000000e+04,-2.224098828125000000e+04,-4.234375381469726562e+01,2.932823828125000000e+04,1.713604296875000000e+04

+ 36 - 0
Tests/UnitTest/PregeneratedData/mul_s16_spill/input.txt

@@ -0,0 +1,36 @@
+# 1,7,5,7
+2.167023828125000000e+04,-2.865405468750000000e+04,1.044138671875000000e+04,-3.077359375000000000e+04,1.004234765625000000e+04,-2.497993164062500000e+04,3.996687500000000000e+03
+1.286549218750000000e+04,-1.223751562500000000e+04,-3.261115820312500000e+04,2.589599609375000000e+04,2.374840234375000000e+04,-2.821582031250000000e+03,3.062621875000000000e+04
+1.098675390625000000e+04,1.349836328125000000e+04,2.091047656250000000e+04,-1.287281054687500000e+04,-2.362382812500000000e+04,1.596613281250000000e+04,-1.924041015625000000e+03
+-1.903501953125000000e+03,2.538301171875000000e+04,-2.057553125000000000e+04,8.559128906250000000e+03,-3.215989257812500000e+04,2.474398046875000000e+04,2.328741015625000000e+04
+2.578194921875000000e+04,-2.892569140625000000e+04,-1.975657421875000000e+04,2.872478906250000000e+04,-2.352600000000000000e+04,4.541304687500000000e+03,-3.036567773437500000e+04
+-8.557791015625000000e+03,1.737088281250000000e+04,-1.041404492187500000e+04,2.200908593750000000e+04,-2.954736914062500000e+04,-1.394223242187500000e+04,-1.593846679687500000e+04
+2.614140820312500000e+04,-5.848316406250000000e+03,-1.941328125000000000e+04,1.612521484375000000e+04,8.303023437500000000e+03,-2.929174023437500000e+04,1.410194140625000000e+04
+-3.134229492187500000e+04,2.805927343750000000e+04,-2.328598046875000000e+04,1.848082812500000000e+04,-2.253886718750000000e+04,3.978078125000000000e+03,-1.892425781250000000e+04
+2.117060546875000000e+04,3.612289062500000000e+03,4.240683593750000000e+03,-3.019317187500000000e+04,-1.157797265625000000e+04,3.000391796875000000e+04,5.486129394531250000e+03
+-3.258609570312500000e+04,-2.497419726562500000e+04,-8.056408203125000000e+03,-2.205613281250000000e+04,-9.557494140625000000e+03,-6.250287109375000000e+03,-1.356505078125000000e+04
+-1.361169140625000000e+04,5.011250000000000000e+02,-1.633478125000000000e+04,1.922534765625000000e+04,-9.965468750000000000e+02,-4.920128906250000000e+03,4.357410156250000000e+03
+-2.684437890625000000e+04,1.095673046875000000e+04,-6.499658203125000000e+03,-2.571089648437500000e+04,2.105219531250000000e+04,1.110387109375000000e+04,-1.399845117187500000e+04
+2.030415234375000000e+04,-1.266313476562500000e+04,4.863105468750000000e+03,2.809841406250000000e+04,-1.891789843750000000e+04,2.992544140625000000e+04,1.170657812500000000e+04
+-1.158843164062500000e+04,1.646212500000000000e+04,2.912812500000000000e+04,1.863809765625000000e+04,-2.374933203125000000e+04,1.553234375000000000e+03,-1.327496875000000000e+04
+-8.304005859375000000e+03,8.066718750000000000e+03,2.250410156250000000e+04,1.684900781250000000e+04,2.365146093750000000e+04,1.533281250000000000e+04,-8.801724609375000000e+03
+-3.243165429687500000e+04,-2.574731054687500000e+04,-2.105723437500000000e+04,3.222935937500000000e+04,6.982011718750000000e+03,-3.044551953125000000e+04,-3.641593750000000000e+03
+-1.927463671875000000e+04,1.105179296875000000e+04,-1.861640234375000000e+04,1.461640625000000000e+03,-5.807794921875000000e+03,-2.506925000000000000e+04,8.580703125000000000e+03
+-1.341628710937500000e+04,1.864491015625000000e+04,-2.884299804687500000e+04,-1.215412695312500000e+04,1.697173437500000000e+04,4.876207031250000000e+03,1.973905078125000000e+04
+1.773744140625000000e+04,-2.146133593750000000e+04,-6.216544921875000000e+03,2.478555859375000000e+04,1.562326269531250000e+04,-2.820939843750000000e+04,-2.860348437500000000e+04
+1.252968359375000000e+04,3.079352343750000000e+04,2.321439453125000000e+04,1.474035156250000000e+04,-2.375846484375000000e+04,-2.356772656250000000e+04,-1.822383007812500000e+04
+8.055652343750000000e+03,-2.733041796875000000e+04,7.573289062500000000e+03,-2.326378515625000000e+04,9.815781250000000000e+03,-2.842483398437500000e+04,1.471158984375000000e+04
+9.247023437500000000e+03,-2.704878320312500000e+04,-8.029595703125000000e+03,1.531577343750000000e+04,-1.992399218750000000e+04,2.553435156250000000e+04,6.214070312500000000e+03
+-2.096272070312500000e+04,-1.932061132812500000e+04,1.351560546875000000e+04,2.195633593750000000e+04,1.930772265625000000e+04,2.454625000000000000e+04,1.367925781250000000e+03
+2.451851562500000000e+04,-1.508042578125000000e+04,-1.760892578125000000e+04,2.465173437500000000e+04,-1.773186328125000000e+04,-1.556745703125000000e+04,2.707745312500000000e+04
+1.621202734375000000e+04,1.150592578125000000e+04,8.137046875000000000e+03,1.021171875000000000e+03,2.979430468750000000e+04,1.441859375000000000e+04,-1.212557226562500000e+04
+-2.052025000000000000e+04,-1.824150390625000000e+04,1.285339843750000000e+04,-3.059632812500000000e+03,-1.779539257812500000e+04,2.351082031250000000e+03,-2.104591406250000000e+04
+-3.689646484375000000e+03,-1.369678320312500000e+04,1.509904296875000000e+04,-2.726867773437500000e+04,-2.984010742187500000e+04,4.824089843750000000e+03,3.217689062500000000e+04
+-2.083152734375000000e+04,1.153889453125000000e+04,1.731058593750000000e+03,2.697290234375000000e+04,-3.047951953125000000e+04,-4.538705078125000000e+03,-2.584429687500000000e+03
+-2.677743554687500000e+04,7.225765136718750000e+03,-2.218787109375000000e+03,6.492222656250000000e+03,1.677949218750000000e+04,2.457570312500000000e+04,-6.094390625000000000e+03
+3.194099414062500000e+04,2.200089843750000000e+03,1.631354687500000000e+04,9.000371093750000000e+03,2.010014453125000000e+04,4.985121093750000000e+03,-1.613177734375000000e+04
+-2.282198046875000000e+04,1.472495312500000000e+04,-3.267349414062500000e+04,2.491843359375000000e+04,-3.134534179687500000e+04,-2.799095507812500000e+04,1.568630078125000000e+04
+-2.491329882812500000e+04,1.948627343750000000e+04,-2.431703515625000000e+04,-2.576504492187500000e+04,7.777164062500000000e+03,1.431704687500000000e+04,1.541877343750000000e+04
+2.300410156250000000e+04,1.023736718750000000e+04,6.281902832031250000e+03,9.815765625000000000e+03,-1.780107226562500000e+04,3.218269531250000000e+03,-4.906558593750000000e+03
+-2.660108398437500000e+04,1.405255078125000000e+04,-1.394549023437500000e+04,-2.196962500000000000e+04,-9.908193359375000000e+03,-2.193678906250000000e+04,-7.393316406250000000e+03
+-2.913683593750000000e+04,5.429050781250000000e+03,-9.220679687500000000e+03,-9.149023437500000000e+01,-6.380371093750000000e+03,-6.302748046875000000e+03,2.861776562500000000e+04

+ 36 - 0
Tests/UnitTest/PregeneratedData/mul_s16_spill/kernel.txt

@@ -0,0 +1,36 @@
+# 1,7,5,7
+4.520781250000000000e+03,1.045247265625000000e+04,-2.895792578125000000e+04,-2.479576171875000000e+04,-2.013201953125000000e+04,8.765398437500000000e+03,-2.741163671875000000e+04
+2.612760156250000000e+04,2.224841210937500000e+04,1.779949218750000000e+03,-2.452207812500000000e+04,-2.381777734375000000e+04,2.174143359375000000e+04,-2.021862890625000000e+04
+-7.959230468750000000e+03,-1.031984960937500000e+04,-2.428418554687500000e+04,2.426617578125000000e+04,1.153326171875000000e+04,-6.712949218750000000e+02,-1.539657812500000000e+04
+2.771436718750000000e+04,1.163246484375000000e+04,4.737636718750000000e+03,2.099911718750000000e+04,3.026303125000000000e+04,3.046071484375000000e+04,-1.933603320312500000e+04
+1.417164062500000000e+03,2.455991943359375000e+03,3.079125000000000000e+03,-1.819370703125000000e+04,-5.625843750000000000e+03,-2.783460546875000000e+04,-5.906933593750000000e+02
+-2.486009765625000000e+04,2.958999218750000000e+04,1.133993750000000000e+04,-2.518941210937500000e+04,2.591175390625000000e+04,2.225841796875000000e+04,2.488918945312500000e+04
+-2.123075390625000000e+04,-6.513814453125000000e+03,3.257266015625000000e+04,-3.759849609375000000e+03,-2.517912304687500000e+04,2.097144531250000000e+04,1.776980078125000000e+04
+-9.060041015625000000e+03,-2.515343750000000000e+03,1.856404296875000000e+04,-1.568828125000000000e+03,-2.125394921875000000e+04,-3.504947265625000000e+03,-4.153726562500000000e+03
+-1.767609765625000000e+04,-2.936958203125000000e+04,2.933737890625000000e+04,2.570963281250000000e+04,-2.223701171875000000e+04,-2.737221484375000000e+04,-2.418431835937500000e+04
+-2.560240625000000000e+04,-8.302146484375000000e+03,-1.905546679687500000e+04,8.151054687500000000e+03,2.983273046875000000e+04,-9.810288085937500000e+03,-7.388472656250000000e+03
+-9.750156250000000000e+03,-2.367832031250000000e+03,-2.992141796875000000e+04,1.551296484375000000e+04,-5.230703125000000000e+02,2.428363281250000000e+03,-4.037289062500000000e+03
+1.884660156250000000e+04,2.193899218750000000e+04,-1.283707812500000000e+04,-1.631669726562500000e+04,-3.309652343750000000e+03,-1.390876953125000000e+03,-1.725058593750000000e+02
+2.941542578125000000e+04,3.274316406250000000e+04,-1.838657031250000000e+04,1.360185546875000000e+04,9.262937500000000000e+03,6.431019531250000000e+03,2.671425000000000000e+04
+-1.659130859375000000e+04,-1.387675000000000000e+04,-7.579369140625000000e+03,1.330485156250000000e+04,2.406644531250000000e+03,-2.567999804687500000e+04,1.903140625000000000e+04
+-1.593857617187500000e+04,2.997205078125000000e+04,3.097894921875000000e+04,1.956669531250000000e+04,2.247124218750000000e+04,1.029136718750000000e+04,8.022019531250000000e+03
+-4.847878417968750000e+03,5.198617187500000000e+03,6.990761230468750000e+03,-1.803659960937500000e+04,-1.773943359375000000e+04,-1.134675585937500000e+04,1.085078125000000000e+02
+2.155166406250000000e+04,-1.906255273437500000e+04,3.173906250000000000e+04,4.280433593750000000e+03,-1.684985156250000000e+04,2.869819531250000000e+04,-1.586458593750000000e+04
+-7.798919921875000000e+03,-1.843185351562500000e+04,-1.389452929687500000e+04,1.061107421875000000e+04,-2.697986914062500000e+04,-2.193582031250000000e+04,-2.159805468750000000e+04
+3.050135546875000000e+04,-3.070978906250000000e+04,1.050001171875000000e+04,-3.245124609375000000e+04,2.825725000000000000e+04,1.122539843750000000e+04,3.268217968750000000e+04
+-1.196513671875000000e+03,1.793134375000000000e+04,-3.444095703125000000e+03,2.521093750000000000e+03,-1.710165234375000000e+04,2.725807812500000000e+04,-6.549291015625000000e+03
+-2.602197070312500000e+04,8.910871093750000000e+03,-2.541019140625000000e+04,1.653552734375000000e+04,-2.622601367187500000e+04,-1.173983593750000000e+04,2.535256640625000000e+04
+1.737542187500000000e+04,-2.121059765625000000e+04,-2.722066992187500000e+04,2.617407031250000000e+04,-2.696504882812500000e+04,-1.586420996093750000e+04,1.920625000000000000e+03
+-2.920108593750000000e+04,-7.852740234375000000e+03,-2.504875000000000000e+04,-2.814639062500000000e+04,-2.319782617187500000e+04,2.572168750000000000e+04,-3.166914062500000000e+02
+1.952148437500000000e+02,4.698039062500000000e+03,7.648703125000000000e+03,2.450208593750000000e+04,5.598351562500000000e+03,-2.873571093750000000e+04,-4.914488281250000000e+03
+-1.388809375000000000e+04,-3.235247460937500000e+04,-2.076108203125000000e+04,-1.462275268554687500e+03,-9.726937500000000000e+03,-7.070931640625000000e+03,1.287016406250000000e+04
+2.695929296875000000e+04,-1.836121875000000000e+04,-6.352722656250000000e+03,2.747713671875000000e+04,-2.946412109375000000e+04,-1.610224609375000000e+04,-2.816005468750000000e+04
+2.892277343750000000e+03,-1.195933398437500000e+04,9.379832031250000000e+03,1.456906250000000000e+03,1.379803125000000000e+04,-1.776399414062500000e+04,1.732593750000000000e+04
+-6.572001953125000000e+03,3.213471093750000000e+04,1.097666015625000000e+04,1.432437500000000000e+03,1.976124609375000000e+04,-2.570202148437500000e+04,8.443238281250000000e+03
+2.476855078125000000e+04,2.059554687500000000e+04,-2.772004687500000000e+04,1.647461718750000000e+04,6.336355468750000000e+03,1.200433593750000000e+03,1.431467968750000000e+04
+-3.330589843750000000e+03,-2.991357421875000000e+04,1.328832031250000000e+04,2.046915625000000000e+04,9.863816406250000000e+03,-2.034300976562500000e+04,-2.365567773437500000e+04
+1.840232812500000000e+04,-1.013185351562500000e+04,2.433244921875000000e+04,1.327124218750000000e+04,-9.591347656250000000e+02,1.992603906250000000e+04,-2.885341210937500000e+04
+1.058421484375000000e+04,-1.928158984375000000e+04,-2.332664453125000000e+04,2.037697656250000000e+04,2.450278125000000000e+04,9.397589843750000000e+03,-3.192091992187500000e+04
+-2.566046875000000000e+03,-3.154221484375000000e+04,2.848189453125000000e+04,1.796555273437500000e+04,-2.045318750000000000e+04,1.461810546875000000e+04,-2.625648242187500000e+04
+2.671117187500000000e+04,-1.670837890625000000e+04,1.058269140625000000e+04,2.185777734375000000e+04,-1.634869531250000000e+04,-1.263386718750000000e+03,1.808980468750000000e+03
+-5.910136718750000000e+02,7.289515625000000000e+03,-6.931769531250000000e+03,2.942483203125000000e+04,1.783266210937500000e+04,-2.869867187500000000e+04,-1.746570312500000000e+04

+ 15 - 0
Tests/UnitTest/TestCases/TestData/add_s16_spill/config_data.h

@@ -0,0 +1,15 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#define ADD_S16_SPILL_DST_SIZE 105
+#define ADD_S16_SPILL_OUT_ACTIVATION_MIN -2000
+#define ADD_S16_SPILL_OUT_ACTIVATION_MAX 32767
+#define ADD_S16_SPILL_INPUT1_OFFSET 0
+#define ADD_S16_SPILL_INPUT2_OFFSET 0
+#define ADD_S16_SPILL_OUTPUT_MULT 1073741824
+#define ADD_S16_SPILL_OUTPUT_SHIFT -14
+#define ADD_S16_SPILL_OUTPUT_OFFSET 0
+#define ADD_S16_SPILL_LEFT_SHIFT 15
+#define ADD_S16_SPILL_INPUT1_SHIFT 0
+#define ADD_S16_SPILL_INPUT2_SHIFT 0
+#define ADD_S16_SPILL_INPUT1_MULT 1073741824
+#define ADD_S16_SPILL_INPUT2_MULT 1073741824

+ 13 - 0
Tests/UnitTest/TestCases/TestData/add_s16_spill/input1_data.h

@@ -0,0 +1,13 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_spill_input1[105] = {
+    24916,  32564,  3404,   -8132,  -13816, 9168,   -25483, 3818,   7630,   -31664, -14491, 32138,  -2191,  -28964,
+    24361,  -6775,  -26217, 10503,  29957,  987,    -939,   9274,   5375,   12734,  12548,  -13026, 17104,  -21144,
+    25612,  -28295, 11581,  -1113,  -14912, -18051, -26314, 10511,  29051,  22290,  -14873, 27631,  -7466,  -10421,
+    16827,  -29066, 28824,  14860,  -11184, -14779, -25359, 9233,   -12774, 31860,  23930,  -3032,  3969,   -412,
+    -16117, -4323,  -327,   1829,   15209,  23472,  13074,  -23844, -13540, 7147,   12193,  13038,  27165,  -16784,
+    -753,   22554,  9677,   -12490, -16161, -16135, 17331,  11565,  16031,  18650,  -6358,  -1287,  -11559, -19727,
+    -29653, -5173,  -24853, 27826,  -32510, 10068,  -678,   -31337, -25126, -26849, -10236, 7634,   13290,  -14687,
+    -23733, 18240,  -31089, 1363,   -10814, 26371,  5802};

+ 13 - 0
Tests/UnitTest/TestCases/TestData/add_s16_spill/input2_data.h

@@ -0,0 +1,13 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_spill_input2[105] = {
+    -5961,  3985,   -2296, -27040, 27696,  18634,  11741,  -11066, -11091, 22723,  -14588, -1066,  19123,  13660,
+    10448,  1409,   9203,  -27831, -12398, 9167,   23949,  -18829, 10106,  -30476, 30078,  -12445, 20211,  22145,
+    21020,  4483,   4643,  -13013, -23124, 5920,   -8723,  -30173, 9830,   32611,  -7662,  18354,  -12293, 12245,
+    24863,  -11302, 32355, 28876,  -10799, -22380, -15413, -15130, 26173,  -13073, -5763,  7922,   2081,   -31418,
+    -14300, 20851,  -2713, -11550, -22921, -642,   -30315, -3744,  17957,  -24845, 27054,  -29664, -29402, -15568,
+    22698,  -11658, 20175, 9846,   6529,   26340,  -11245, -6727,  -4489,  25913,  -6331,  -16670, 23384,  -22663,
+    18610,  -19448, 15628, 9850,   -853,   24871,  -8248,  30669,  1099,   -23842, 6590,   -17218, -14746, -29426,
+    -15603, -6378,  10514, -22240, -42,    29328,  17136};

+ 12 - 0
Tests/UnitTest/TestCases/TestData/add_s16_spill/output_ref_data.h

@@ -0,0 +1,12 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t add_s16_spill_output_ref[105] = {
+    9478,  18275, 554,   -2000, 6940,  13901, -2000, -2000, -1731, -2000, -2000, 15536, 8466,  -2000, 17405,
+    -2000, -2000, -2000, 8780,  5077,  11505, -2000, 7741,  -2000, 21313, -2000, 18658, 501,   23316, -2000,
+    8112,  -2000, -2000, -2000, -2000, -2000, 19441, 27451, -2000, 22993, -2000, 912,   20845, -2000, 30590,
+    21868, -2000, -2000, -2000, -2000, 6700,  9394,  9084,  2445,  3025,  -2000, -2000, 8264,  -1520, -2000,
+    -2000, 11415, -2000, -2000, 2209,  -2000, 19624, -2000, -1119, -2000, 10973, 5448,  14926, -1322, -2000,
+    5103,  3043,  2419,  5771,  22282, -2000, -2000, 5913,  -2000, -2000, -2000, -2000, 18838, -2000, 17470,
+    -2000, -334,  -2000, -2000, -1823, -2000, -728,  -2000, -2000, 5931,  -2000, -2000, -2000, 27850, 11469};

+ 5 - 0
Tests/UnitTest/TestCases/TestData/add_s16_spill/test_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"

+ 10 - 0
Tests/UnitTest/TestCases/TestData/mul_s16_spill/config_data.h

@@ -0,0 +1,10 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#define MUL_S16_SPILL_DST_SIZE 245
+#define MUL_S16_SPILL_OUT_ACTIVATION_MIN -32768
+#define MUL_S16_SPILL_OUT_ACTIVATION_MAX 1000
+#define MUL_S16_SPILL_INPUT1_OFFSET 0
+#define MUL_S16_SPILL_INPUT2_OFFSET 0
+#define MUL_S16_SPILL_OUTPUT_MULT 1073774592
+#define MUL_S16_SPILL_OUTPUT_SHIFT -14
+#define MUL_S16_SPILL_OUTPUT_OFFSET 0

+ 23 - 0
Tests/UnitTest/TestCases/TestData/mul_s16_spill/input1_data.h

@@ -0,0 +1,23 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_spill_input1[245] = {
+    21670,  -28654, 10441,  -30773, 10042,  -24979, 3996,   12865,  -12237, -32611, 25895,  23748,  -2821,  30626,
+    10986,  13498,  20910,  -12872, -23623, 15966,  -1924,  -1903,  25383,  -20575, 8559,   -32159, 24743,  23287,
+    25781,  -28925, -19756, 28724,  -23526, 4541,   -30365, -8557,  17370,  -10414, 22009,  -29547, -13942, -15938,
+    26141,  -5848,  -19413, 16125,  8303,   -29291, 14101,  -31342, 28059,  -23285, 18480,  -22538, 3978,   -18924,
+    21170,  3612,   4240,   -30193, -11577, 30003,  5486,   -32586, -24974, -8056,  -22056, -9557,  -6250,  -13565,
+    -13611, 501,    -16334, 19225,  -996,   -4920,  4357,   -26844, 10956,  -6499,  -25710, 21052,  11103,  -13998,
+    20304,  -12663, 4863,   28098,  -18917, 29925,  11706,  -11588, 16462,  29128,  18638,  -23749, 1553,   -13274,
+    -8304,  8066,   22504,  16849,  23651,  15332,  -8801,  -32431, -25747, -21057, 32229,  6982,   -30445, -3641,
+    -19274, 11051,  -18616, 1461,   -5807,  -25069, 8580,   -13416, 18644,  -28842, -12154, 16971,  4876,   19739,
+    17737,  -21461, -6216,  24785,  15623,  -28209, -28603, 12529,  30793,  23214,  14740,  -23758, -23567, -18223,
+    8055,   -27330, 7573,   -23263, 9815,   -28424, 14711,  9247,   -27048, -8029,  15315,  -19923, 25534,  6214,
+    -20962, -19320, 13515,  21956,  19307,  24546,  1367,   24518,  -15080, -17608, 24651,  -17731, -15567, 27077,
+    16212,  11505,  8137,   1021,   29794,  14418,  -12125, -20520, -18241, 12853,  -3059,  -17795, 2351,   -21045,
+    -3689,  -13696, 15099,  -27268, -29840, 4824,   32176,  -20831, 11538,  1731,   26972,  -30479, -4538,  -2584,
+    -26777, 7225,   -2218,  6492,   16779,  24575,  -6094,  31940,  2200,   16313,  9000,   20100,  4985,   -16131,
+    -22821, 14724,  -32673, 24918,  -31345, -27990, 15686,  -24913, 19486,  -24317, -25765, 7777,   14317,  15418,
+    23004,  10237,  6281,   9815,   -17801, 3218,   -4906,  -26601, 14052,  -13945, -21969, -9908,  -21936, -7393,
+    -29136, 5429,   -9220,  -91,    -6380,  -6302,  28617};

+ 23 - 0
Tests/UnitTest/TestCases/TestData/mul_s16_spill/input2_data.h

@@ -0,0 +1,23 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_spill_input2[245] = {
+    4520,   10452,  -28957, -24795, -20132, 8765,   -27411, 26127,  22248,  1779,   -24522, -23817, 21741,  -20218,
+    -7959,  -10319, -24284, 24266,  11533,  -671,   -15396, 27714,  11632,  4737,   20999,  30263,  30460,  -19336,
+    1417,   2455,   3079,   -18193, -5625,  -27834, -590,   -24860, 29589,  11339,  -25189, 25911,  22258,  24889,
+    -21230, -6513,  32572,  -3759,  -25179, 20971,  17769,  -9060,  -2515,  18564,  -1568,  -21253, -3504,  -4153,
+    -17676, -29369, 29337,  25709,  -22237, -27372, -24184, -25602, -8302,  -19055, 8151,   29832,  -9810,  -7388,
+    -9750,  -2367,  -29921, 15512,  -523,   2428,   -4037,  18846,  21938,  -12837, -16316, -3309,  -1390,  -172,
+    29415,  32743,  -18386, 13601,  9262,   6431,   26714,  -16591, -13876, -7579,  13304,  2406,   -25679, 19031,
+    -15938, 29972,  30978,  19566,  22471,  10291,  8022,   -4847,  5198,   6990,   -18036, -17739, -11346, 108,
+    21551,  -19062, 31739,  4280,   -16849, 28698,  -15864, -7798,  -18431, -13894, 10611,  -26979, -21935, -21598,
+    30501,  -30709, 10500,  -32451, 28257,  11225,  32682,  -1196,  17931,  -3444,  2521,   -17101, 27258,  -6549,
+    -26021, 8910,   -25410, 16535,  -26226, -11739, 25352,  17375,  -21210, -27220, 26174,  -26965, -15864, 1920,
+    -29201, -7852,  -25048, -28146, -23197, 25721,  -316,   195,    4698,   7648,   24502,  5598,   -28735, -4914,
+    -13888, -32352, -20761, -1462,  -9726,  -7070,  12870,  26959,  -18361, -6352,  27477,  -29464, -16102, -28160,
+    2892,   -11959, 9379,   1456,   13798,  -17763, 17325,  -6572,  32134,  10976,  1432,   19761,  -25702, 8443,
+    24768,  20595,  -27720, 16474,  6336,   1200,   14314,  -3330,  -29913, 13288,  20469,  9863,   -20343, -23655,
+    18402,  -10131, 24332,  13271,  -959,   19926,  -28853, 10584,  -19281, -23326, 20376,  24502,  9397,   -31920,
+    -2566,  -31542, 28481,  17965,  -20453, 14618,  -26256, 26711,  -16708, 10582,  21857,  -16348, -1263,  1808,
+    -591,   7289,   -6931,  29424,  17832,  -28698, -17465};

+ 23 - 0
Tests/UnitTest/TestCases/TestData/mul_s16_spill/output_ref_data.h

@@ -0,0 +1,23 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#pragma once
+#include <stdint.h>
+
+const int16_t mul_s16_spill_output_ref[245] = {
+    1000,   -9140,  -9227,  1000,   -6170,  -6682,  -3343,  1000,   -8309,  -1771,  -19379, -17261, -1872,  -18897,
+    -2668,  -4251,  -15497, -9533,  -8315,  -327,   904,    -1610,  1000,   -2974,  1000,   -29701, 1000,   -13742,
+    1000,   -2167,  -1856,  -15948, 1000,   -3857,  547,    1000,   1000,   -3604,  -16919, -23365, -9471,  -12106,
+    -16937, 1000,   -19297, -1850,  -6380,  -18746, 1000,   1000,   -2154,  -13192, -884,   1000,   -425,   1000,
+    -11420, -3237,  1000,   -23689, 1000,   -25063, -4049,  1000,   1000,   1000,   -5487,  -8701,  1000,   1000,
+    1000,   -36,    1000,   1000,   16,     -365,   -537,   -15439, 1000,   1000,   1000,   -2126,  -471,   73,
+    1000,   -12654, -2729,  1000,   -5347,  1000,   1000,   1000,   -6971,  -6737,  1000,   -1744,  -1217,  -7710,
+    1000,   1000,   1000,   1000,   1000,   1000,   -2155,  1000,   -4084,  -4492,  -17740, -3780,  1000,   -12,
+    -12677, -6429,  -18032, 191,    1000,   -21956, -4154,  1000,   -10487, 1000,   -3936,  -13973, -3264,  -13011,
+    1000,   1000,   -1992,  -24546, 1000,   -9664,  -28529, -457,   1000,   -2440,  1000,   1000,   -19605, 1000,
+    -6397,  -7432,  -5873,  -11739, -7856,  1000,   1000,   1000,   1000,   1000,   1000,   1000,   -12362, 364,
+    1000,   1000,   -10331, -18860, -13668, 1000,   -13,    146,    -2162,  -4110,  1000,   -3029,  1000,   -4061,
+    -6871,  -11359, -5156,  -46,    -8844,  -3111,  -4762,  -16883, 1000,   -2492,  -2565,  1000,   -1155,  1000,
+    -326,   1000,   1000,   -1212,  -12565, -2615,  1000,   1000,   1000,   580,    1000,   -18381, 1000,   -666,
+    -20240, 1000,   1000,   1000,   1000,   900,    -2662,  -3246,  -2008,  1000,   1000,   1000,   -3095,  1000,
+    -12816, -4552,  -24262, 1000,   917,    -17021, -13812, -8047,  -11466, 1000,   -16022, 1000,   1000,   -15019,
+    -1801,  -9854,  1000,   1000,   1000,   1000,   1000,   -21685, -7165,  -4503,  -14654, 1000,   846,    -408,
+    526,    1000,   1000,   -82,    -3472,  1000,   -15253};

+ 5 - 0
Tests/UnitTest/TestCases/TestData/mul_s16_spill/test_data.h

@@ -0,0 +1,5 @@
+// Generated by generate_test_data.py using TFL version 2.7.0 as reference.
+#include "config_data.h"
+#include "input1_data.h"
+#include "input2_data.h"
+#include "output_ref_data.h"

+ 1 - 0
Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/Unity/unity_test_arm_elementwise_add_s16.c

@@ -45,3 +45,4 @@ void setUp(void)
 void tearDown(void) {}
 
 void test_add_s16_arm_elementwise_add_s16(void) { add_s16_arm_elementwise_add_s16(); }
+void test_add_s16_spill_arm_elementwise_add_s16(void) { add_s16_spill_arm_elementwise_add_s16(); }

+ 46 - 0
Tests/UnitTest/TestCases/test_arm_elementwise_add_s16/test_arm_elementwise_add_s16.c

@@ -20,6 +20,7 @@
 #include "unity.h"
 
 #include "../TestData/add_s16/test_data.h"
+#include "../TestData/add_s16_spill/test_data.h"
 #include "../Utils/validate.h"
 
 void add_s16_arm_elementwise_add_s16(void)
@@ -66,3 +67,48 @@ void add_s16_arm_elementwise_add_s16(void)
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate_s16(output, add_s16_output_ref, ADD_S16_DST_SIZE));
 }
+
+void add_s16_spill_arm_elementwise_add_s16(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int16_t output[ADD_S16_SPILL_DST_SIZE] = {0};
+
+    const int16_t *input_data1 = add_s16_spill_input1;
+    const int16_t *input_data2 = add_s16_spill_input2;
+
+    const int32_t input_1_mult = ADD_S16_SPILL_INPUT1_MULT;
+    const int32_t input_1_shift = ADD_S16_SPILL_INPUT1_SHIFT;
+    const int32_t input_1_offset = ADD_S16_SPILL_INPUT1_OFFSET;
+    const int32_t input_2_mult = ADD_S16_SPILL_INPUT2_MULT;
+    const int32_t input_2_shift = ADD_S16_SPILL_INPUT2_SHIFT;
+    const int32_t input_2_offset = ADD_S16_SPILL_INPUT2_OFFSET;
+
+    const int32_t left_shift = ADD_S16_SPILL_LEFT_SHIFT;
+
+    const int32_t out_offset = ADD_S16_SPILL_OUTPUT_OFFSET;
+    const int32_t out_mult = ADD_S16_SPILL_OUTPUT_MULT;
+    const int32_t out_shift = ADD_S16_SPILL_OUTPUT_SHIFT;
+
+    const int32_t out_activation_min = ADD_S16_SPILL_OUT_ACTIVATION_MIN;
+    const int32_t out_activation_max = ADD_S16_SPILL_OUT_ACTIVATION_MAX;
+
+    arm_cmsis_nn_status result = arm_elementwise_add_s16(input_data1,
+                                                         input_data2,
+                                                         input_1_offset,
+                                                         input_1_mult,
+                                                         input_1_shift,
+                                                         input_2_offset,
+                                                         input_2_mult,
+                                                         input_2_shift,
+                                                         left_shift,
+                                                         output,
+                                                         out_offset,
+                                                         out_mult,
+                                                         out_shift,
+                                                         out_activation_min,
+                                                         out_activation_max,
+                                                         ADD_S16_SPILL_DST_SIZE);
+
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate_s16(output, add_s16_spill_output_ref, ADD_S16_SPILL_DST_SIZE));
+}

+ 2 - 0
Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/Unity/unity_test_arm_elementwise_mul_s16.c

@@ -45,3 +45,5 @@ void setUp(void)
 void tearDown(void) {}
 
 void test_mul_s16_arm_elementwise_mul_s16(void) { mul_s16_arm_elementwise_mul_s16(); }
+
+void test_mul_s16_spill_arm_elementwise_mul_s16(void) { mul_s16_spill_arm_elementwise_mul_s16(); }

+ 35 - 0
Tests/UnitTest/TestCases/test_arm_elementwise_mul_s16/test_arm_elementwise_mul_s16.c

@@ -20,6 +20,7 @@
 #include "unity.h"
 
 #include "../TestData/mul_s16/test_data.h"
+#include "../TestData/mul_s16_spill/test_data.h"
 #include "../Utils/validate.h"
 
 void mul_s16_arm_elementwise_mul_s16(void)
@@ -55,3 +56,37 @@ void mul_s16_arm_elementwise_mul_s16(void)
     TEST_ASSERT_EQUAL(expected, result);
     TEST_ASSERT_TRUE(validate_s16(output, mul_s16_output_ref, MUL_S16_DST_SIZE));
 }
+
+void mul_s16_spill_arm_elementwise_mul_s16(void)
+{
+    const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS;
+    int16_t output[MUL_S16_SPILL_DST_SIZE] = {0};
+
+    const int16_t *input_data1 = mul_s16_spill_input1;
+    const int16_t *input_data2 = mul_s16_spill_input2;
+
+    const int32_t input_1_offset = MUL_S16_SPILL_INPUT1_OFFSET;
+    const int32_t input_2_offset = MUL_S16_SPILL_INPUT2_OFFSET;
+
+    const int32_t out_offset = MUL_S16_SPILL_OUTPUT_OFFSET;
+    const int32_t out_mult = MUL_S16_SPILL_OUTPUT_MULT;
+    const int32_t out_shift = MUL_S16_SPILL_OUTPUT_SHIFT;
+
+    const int32_t out_activation_min = MUL_S16_SPILL_OUT_ACTIVATION_MIN;
+    const int32_t out_activation_max = MUL_S16_SPILL_OUT_ACTIVATION_MAX;
+
+    arm_cmsis_nn_status result = arm_elementwise_mul_s16(input_data1,
+                                                         input_data2,
+                                                         input_1_offset,
+                                                         input_2_offset,
+                                                         output,
+                                                         out_offset,
+                                                         out_mult,
+                                                         out_shift,
+                                                         out_activation_min,
+                                                         out_activation_max,
+                                                         MUL_S16_SPILL_DST_SIZE);
+
+    TEST_ASSERT_EQUAL(expected, result);
+    TEST_ASSERT_TRUE(validate_s16(output, mul_s16_spill_output_ref, MUL_S16_SPILL_DST_SIZE));
+}

+ 8 - 0
Tests/UnitTest/generate_test_data.py

@@ -1420,6 +1420,10 @@ def load_all_testdatasets():
     ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=4, y_in=4,
                                                 randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
                                                 out_activation_max=INT16_MAX, int16xint8=True)
+    dataset = 'add_s16_spill'
+    ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=7, x_in=5, y_in=3,
+                                                randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=-2000,
+                                                out_activation_max=INT16_MAX, int16xint8=True)
 
     type_of_test = 'mul'
     dataset = 'mul'
@@ -1429,6 +1433,10 @@ def load_all_testdatasets():
     ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=8, x_in=5, y_in=4,
                                                 randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
                                                 out_activation_max=INT16_MAX, int16xint8=True)
+    dataset = 'mul_s16_spill'
+    ALL_TESTDATA_SETS[dataset] = AddMulSettings(dataset, type_of_test, args, channels=7, x_in=5, y_in=7,
+                                                randmin=INT16_MIN, randmax=INT16_MAX, out_activation_min=INT16_MIN,
+                                                out_activation_max=1000, int16xint8=True)
 
 
 if __name__ == '__main__':