6 жил өмнө · ee5f859533
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
@@ -124,7 +124,7 @@ arm_status arm_convolve_1x1_s8_fast(const q7_t *input,
 
				             {
			
 
				                 const mve_pred16_t p = vctp8q(col_loop);
			
 
				                 const int8x16_t k_0 = vldrbq_z_s8(ker_n_0 + off, p);
			
 
				-                sum_k += vaddvq_p_s32(k_0, p);
			
 
				+                sum_k += vaddvq_p_s8(k_0, p);
			
 
				 
			
 
				                 const int8x16_t n_0 = vldrbq_z_s8(ip_n_0 + off, p);
			
 
				                 acc += vmladavq_p_s8(n_0, k_0, p);
			
--- a/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
+++ b/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c
@@ -100,7 +100,7 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
 
				                 const mve_pred16_t p = vctp8q(col_loop);
			
 
				 
			
 
				                 const int8x16_t k_0 = vldrbq_z_s8(ker_n_0 + offset, p);
			
 
				-                sum_row += vaddvq_p_s32(k_0, p);
			
 
				+                sum_row += vaddvq_p_s8(k_0, p);
			
 
				 
			
 
				                 const int8x16_t n_0 = vldrbq_z_s8(ip_n_0 + offset, p);
			
 
				                 const int8x16_t n_1 = vldrbq_z_s8(ip_n_1 + offset, p);
			
@@ -132,7 +132,7 @@ q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
 
				             out[i_out_ch + output_ch * 3] = res[3];
			
 
				         }
			
 
				 
			
 
				-        out += ((i_items + 1) * 4 * output_ch);
			
 
				+        out += (4 * output_ch);
			
 
				     }
			
 
				 
			
 
				     return out;