Przeglądaj źródła

CMSIS-DSP: Improvement to f16 helium code for linear SVM.
Improved tests for weighted sum.

Christophe Favergeon 5 lat temu
rodzic
commit
4b4d5322c8

+ 10 - 6
Source/SVMFunctions/arm_svm_linear_predict_f16.c

@@ -146,10 +146,12 @@ void arm_svm_linear_predict_f16(
         /*
          * Sum the partial parts
          */
-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0);
-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc1);
-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc2);
-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc3);
+        acc0 = vmulq_n_f16(acc0,*pDualCoef++);
+        acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++);
+        acc0 = vfmaq_n_f16(acc0,acc2,*pDualCoef++);
+        acc0 = vfmaq_n_f16(acc0,acc3,*pDualCoef++);
+
+        sum += vecAddAcrossF16Mve(acc0);
 
         pSrcA += numCols * 4;
         /*
@@ -216,8 +218,10 @@ void arm_svm_linear_predict_f16(
         /*
          * Sum the partial parts
          */
-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0);
-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc1);
+        acc0 = vmulq_n_f16(acc0,*pDualCoef++);
+        acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++);
+
+        sum += vecAddAcrossF16Mve(acc0);
 
         pSrcA += numCols * 2;
         row -= 2;

+ 12 - 6
Source/SVMFunctions/arm_svm_linear_predict_f32.c

@@ -142,10 +142,13 @@ void arm_svm_linear_predict_f32(
         /*
          * Sum the partial parts
          */
-        sum += *pDualCoef++ * vecAddAcrossF32Mve(acc0);
-        sum += *pDualCoef++ * vecAddAcrossF32Mve(acc1);
-        sum += *pDualCoef++ * vecAddAcrossF32Mve(acc2);
-        sum += *pDualCoef++ * vecAddAcrossF32Mve(acc3);
+
+        acc0 = vmulq_n_f32(acc0,*pDualCoef++);
+        acc0 = vfmaq_n_f32(acc0,acc1,*pDualCoef++);
+        acc0 = vfmaq_n_f32(acc0,acc2,*pDualCoef++);
+        acc0 = vfmaq_n_f32(acc0,acc3,*pDualCoef++);
+
+        sum += vecAddAcrossF32Mve(acc0);
 
         pSrcA += numCols * 4;
         /*
@@ -212,8 +215,11 @@ void arm_svm_linear_predict_f32(
         /*
          * Sum the partial parts
          */
-        sum += *pDualCoef++ * vecAddAcrossF32Mve(acc0);
-        sum += *pDualCoef++ * vecAddAcrossF32Mve(acc1);
+        acc0 = vmulq_n_f32(acc0,*pDualCoef++);
+        acc0 = vfmaq_n_f32(acc0,acc1,*pDualCoef++);
+
+        sum += vecAddAcrossF32Mve(acc0);
+
 
         pSrcA += numCols * 2;
         row -= 2;

+ 5 - 3
Testing/CMakeLists.txt

@@ -403,16 +403,18 @@ endif()
 disableOptimization(TestingLib)
 disableOptimization(FrameworkLib)
 
-
+## Only build f16 version when running float16tests
+if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
+target_sources(TestingLib PRIVATE ${TESTSRC16})
+else()
 target_sources(TestingLib PRIVATE ${TESTSRC})
-
+endif()
 
 if(NN)
   target_sources(TestingLib PRIVATE ${NNSRC})
 endif()
 
 
-target_sources(TestingLib PRIVATE ${TESTSRC16})
 
 target_sources(TestingLib PRIVATE testmain.cpp)
 target_sources(TestingLib PRIVATE GeneratedSource/TestDesc.cpp)

+ 1 - 1
Testing/Source/Tests/SupportTestsF16.cpp

@@ -8,7 +8,7 @@
 #define REL_ERROR (1.0e-5)
 
 #define ABS_WEIGHTEDSUM_ERROR (5.0e-2)
-#define REL_WEIGHTEDSUM_ERROR (1.0e-2)
+#define REL_WEIGHTEDSUM_ERROR (5.0e-2)
 
 #define ABS_ERROR_F32 (1.0e-3)
 #define REL_ERROR_F32 (1.0e-3)