5 yıl önce · 078c0b282d
--- a/Include/arm_helium_utils.h
+++ b/Include/arm_helium_utils.h
@@ -72,7 +72,7 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
 
				     in = vaddq_f16(tmpVec, in);
			
 
				     tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in);
			
 
				     in = vaddq_f16(tmpVec, in);
			
 
				-    acc = vgetq_lane_f16(in, 0) + vgetq_lane_f16(in, 4);
			
 
				+    acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4);
			
 
				 
			
 
				     return acc;
			
 
				 }
			
--- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
+++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
@@ -67,7 +67,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
 
				     const float16_t *pIn = in;
			
 
				     float16_t       result;
			
 
				     f16x8_t         vsigma;
			
 
				-    float16_t       tmp;
			
 
				+    _Float16       tmp;
			
 
				     f16x8_t         vacc1, vacc2;
			
 
				     uint32_t        index;
			
 
				     float16_t       logclassPriors[S->numberOfClasses];
			
@@ -81,15 +81,15 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
 
				     for (nbClass = 0; nbClass < S->numberOfClasses; nbClass++) {
			
 
				         pIn = in;
			
 
				 
			
 
				-        vacc1 = vdupq_n_f16(0);
			
 
				-        vacc2 = vdupq_n_f16(0);
			
 
				+        vacc1 = vdupq_n_f16(0.0f16);
			
 
				+        vacc2 = vdupq_n_f16(0.0f16);
			
 
				 
			
 
				         uint32_t         blkCnt =S->vectorDimension >> 3;
			
 
				         while (blkCnt > 0U) {
			
 
				             f16x8_t         vinvSigma, vtmp;
			
 
				 
			
 
				             vsigma = vaddq_n_f16(vld1q(pSigma), S->epsilon);
			
 
				-            vacc1 = vaddq(vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f * PI)));
			
 
				+            vacc1 = vaddq(vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f16 * (_Float16)PI)));
			
 
				 
			
 
				             vinvSigma = vrecip_medprec_f16(vsigma);
			
 
				 
			
@@ -112,7 +112,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
 
				 
			
 
				             vsigma = vaddq_n_f16(vld1q(pSigma), S->epsilon);
			
 
				             vacc1 =
			
 
				-                vaddq_m_f16(vacc1, vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f * PI)), p0);
			
 
				+                vaddq_m_f16(vacc1, vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f16 * (_Float16)PI)), p0);
			
 
				 
			
 
				             vinvSigma = vrecip_medprec_f16(vsigma);
			
 
				 
			
@@ -126,8 +126,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
 
				             pSigma += blkCnt;
			
 
				         }
			
 
				 
			
 
				-        tmp = -0.5f * vecAddAcrossF16Mve(vacc1);
			
 
				-        tmp -= 0.5f * vecAddAcrossF16Mve(vacc2);
			
 
				+        tmp = -0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc1);
			
 
				+        tmp -= 0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc2);
			
 
				 
			
 
				         *buffer = tmp + *pLogPrior++;
			
 
				         buffer++;
			
@@ -175,13 +175,13 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
 
				         
			
 
				         pIn = in;
			
 
				 
			
 
				-        tmp = 0.0;
			
 
				-        acc1 = 0.0f;
			
 
				-        acc2 = 0.0f;
			
 
				+        tmp = 0.0f16;
			
 
				+        acc1 = 0.0f16;
			
 
				+        acc2 = 0.0f16;
			
 
				         for(nbDim = 0; nbDim < S->vectorDimension; nbDim++)
			
 
				         {
			
 
				            sigma = *pSigma + S->epsilon;
			
 
				-           acc1 += logf(2.0f * PI_F * sigma);
			
 
				+           acc1 += logf(2.0f16 * (_Float16)PI_F * sigma);
			
 
				            acc2 += (*pIn - *pTheta) * (*pIn - *pTheta) / sigma;
			
 
				 
			
 
				            pIn++;
			
@@ -189,8 +189,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
 
				            pSigma++;
			
 
				         }
			
 
				 
			
 
				-        tmp = -0.5f * acc1;
			
 
				-        tmp -= 0.5f * acc2;
			
 
				+        tmp = -0.5f16 * acc1;
			
 
				+        tmp -= 0.5f16 * acc2;
			
 
				 
			
 
				 
			
 
				         *buffer = tmp + logf(*pPrior++);
			
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
@@ -172,7 +172,7 @@ void arm_cmplx_mag_f16(
 
				         uint32_t numSamples)
			
 
				 {
			
 
				   uint32_t blkCnt;                               /* loop counter */
			
 
				-  float16_t real, imag;                      /* Temporary variables to hold input values */
			
 
				+  _Float16 real, imag;                      /* Temporary variables to hold input values */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
			
 
				 
			
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
@@ -107,7 +107,7 @@ void arm_cmplx_mag_squared_f16(
 
				         uint32_t numSamples)
			
 
				 {
			
 
				         uint32_t blkCnt;                               /* Loop counter */
			
 
				-        float16_t real, imag;                          /* Temporary input variables */
			
 
				+        _Float16 real, imag;                          /* Temporary input variables */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
			
 
				 
			
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
@@ -103,7 +103,7 @@ void arm_cmplx_mult_cmplx_f16(
 
				         pDst += 8;
			
 
				     }
			
 
				 
			
 
				-    float16_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
			
 
				+    _Float16 a, b, c, d;  /* Temporary variables to store real and imaginary values */
			
 
				         /* Tail */
			
 
				     blkCnt = (blockSize & 7) >> 1;
			
 
				     while (blkCnt > 0)
			
@@ -134,7 +134,7 @@ void arm_cmplx_mult_cmplx_f16(
 
				         uint32_t numSamples)
			
 
				 {
			
 
				     uint32_t blkCnt;                               /* Loop counter */
			
 
				-    float16_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
			
 
				+    _Float16 a, b, c, d;  /* Temporary variables to store real and imaginary values */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
			
 
				 
			
--- a/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
+++ b/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
@@ -72,7 +72,7 @@
 
				 
			
 
				 float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-    float16_t       accumDiff = 0.0f, accumSum = 0.0f;
			
 
				+    _Float16        accumDiff = 0.0f, accumSum = 0.0f;
			
 
				     uint32_t        blkCnt;
			
 
				     f16x8_t         a, b, c, accumDiffV, accumSumV;
			
 
				 
			
@@ -125,14 +125,14 @@ float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, u
 
				 
			
 
				 float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-   float16_t accumDiff=0.0f, accumSum=0.0f, tmpA, tmpB;
			
 
				+   _Float16 accumDiff=0.0f16, accumSum=0.0f16, tmpA, tmpB;
			
 
				 
			
 
				    while(blockSize > 0)
			
 
				    {
			
 
				       tmpA = *pA++;
			
 
				       tmpB = *pB++;
			
 
				-      accumDiff += fabsf(tmpA - tmpB);
			
 
				-      accumSum += fabsf(tmpA + tmpB);
			
 
				+      accumDiff += (_Float16)fabsf(tmpA - tmpB);
			
 
				+      accumSum += (_Float16)fabsf(tmpA + tmpB);
			
 
				       blockSize --;
			
 
				    }
			
 
				    /*
			
--- a/Source/DistanceFunctions/arm_canberra_distance_f16.c
+++ b/Source/DistanceFunctions/arm_canberra_distance_f16.c
@@ -70,7 +70,7 @@
 
				 
			
 
				 float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-    float16_t       accum = 0.0f;
			
 
				+    _Float16       accum = 0.0f16;
			
 
				     uint32_t         blkCnt;
			
 
				     f16x8_t         a, b, c, accumV;
			
 
				 
			
@@ -141,7 +141,7 @@ float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uin
 
				 #else
			
 
				 float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-   float16_t accum=0.0f, tmpA, tmpB,diff,sum;
			
 
				+   _Float16 accum=0.0f, tmpA, tmpB,diff,sum;
			
 
				 
			
 
				    while(blockSize > 0)
			
 
				    {
			
@@ -150,7 +150,7 @@ float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uin
 
				 
			
 
				       diff = fabsf(tmpA - tmpB);
			
 
				       sum = fabsf(tmpA) + fabsf(tmpB);
			
 
				-      if ((tmpA != 0.0f) || (tmpB != 0.0f))
			
 
				+      if ((tmpA != 0.0f16) || (tmpB != 0.0f16))
			
 
				       {
			
 
				          accum += (diff / sum);
			
 
				       }
			
--- a/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
+++ b/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
@@ -67,7 +67,7 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, ui
 
				     uint32_t        blkCnt;     /* loop counters */
			
 
				     f16x8_t         vecA, vecB;
			
 
				     f16x8_t         vecDiff = vdupq_n_f16(0.0);
			
 
				-    float16_t       maxValue = 0.0;
			
 
				+    float16_t       maxValue = 0.0f16;
			
 
				 
			
 
				 
			
 
				     blkCnt = blockSize >> 3;
			
@@ -111,7 +111,7 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, ui
 
				 #else
			
 
				 float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-   float16_t diff=0.0f,  maxVal,tmpA, tmpB;
			
 
				+   _Float16 diff=0.0f,  maxVal,tmpA, tmpB;
			
 
				 
			
 
				    tmpA = *pA++;
			
 
				    tmpB = *pB++;
			
--- a/Source/DistanceFunctions/arm_cityblock_distance_f16.c
+++ b/Source/DistanceFunctions/arm_cityblock_distance_f16.c
@@ -102,14 +102,14 @@ float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, ui
 
				 #else
			
 
				 float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-   float16_t accum,tmpA, tmpB;
			
 
				+   _Float16 accum,tmpA, tmpB;
			
 
				 
			
 
				-   accum = 0.0f;
			
 
				+   accum = 0.0f16;
			
 
				    while(blockSize > 0)
			
 
				    {
			
 
				       tmpA = *pA++;
			
 
				       tmpB = *pB++;
			
 
				-      accum  += fabsf(tmpA - tmpB);
			
 
				+      accum  += (_Float16)fabsf(tmpA - tmpB);
			
 
				       
			
 
				       blockSize --;
			
 
				    }
			
--- a/Source/DistanceFunctions/arm_euclidean_distance_f16.c
+++ b/Source/DistanceFunctions/arm_euclidean_distance_f16.c
@@ -105,16 +105,17 @@ float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, ui
 
				 #else
			
 
				 float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-   float16_t accum=0.0f,tmp;
			
 
				+   _Float16 accum=0.0f,tmp;
			
 
				+   float16_t result;
			
 
				 
			
 
				    while(blockSize > 0)
			
 
				    {
			
 
				-      tmp = *pA++ - *pB++;
			
 
				+      tmp = (_Float16)*pA++ - (_Float16)*pB++;
			
 
				       accum += SQ(tmp);
			
 
				       blockSize --;
			
 
				    }
			
 
				-   arm_sqrt_f16(accum,&tmp);
			
 
				-   return(tmp);
			
 
				+   arm_sqrt_f16(accum,&result);
			
 
				+   return(result);
			
 
				 }
			
 
				 
			
 
				 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
			
--- a/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
+++ b/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
@@ -145,16 +145,17 @@ float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB
 
				 
			
 
				 float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
			
 
				 {
			
 
				-    float16_t left, right,sum, result, tmp;
			
 
				+    _Float16 left, right,sum, tmp;
			
 
				+    float16_t result;
			
 
				     uint32_t i;
			
 
				 
			
 
				-    left = 0.0f; 
			
 
				-    right = 0.0f;
			
 
				+    left = 0.0f16; 
			
 
				+    right = 0.0f16;
			
 
				     for(i=0; i < blockSize; i++)
			
 
				     {
			
 
				-      tmp = (pA[i] + pB[i]) / 2.0f;
			
 
				-      left  += rel_entr(pA[i], tmp);
			
 
				-      right += rel_entr(pB[i], tmp);
			
 
				+      tmp = ((_Float16)pA[i] + (_Float16)pB[i]) / 2.0f16;
			
 
				+      left  += (_Float16)rel_entr(pA[i], tmp);
			
 
				+      right += (_Float16)rel_entr(pB[i], tmp);
			
 
				     }
			
 
				 
			
 
				 
			
--- a/Source/DistanceFunctions/arm_minkowski_distance_f16.c
+++ b/Source/DistanceFunctions/arm_minkowski_distance_f16.c
@@ -111,13 +111,13 @@ float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, in
 
				 
			
 
				 float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
			
 
				 {
			
 
				-    float16_t sum;
			
 
				+    _Float16 sum;
			
 
				     uint32_t i;
			
 
				 
			
 
				     sum = 0.0f; 
			
 
				     for(i=0; i < blockSize; i++)
			
 
				     {
			
 
				-       sum += powf(fabsf(pA[i] - pB[i]),order);
			
 
				+       sum += (_Float16)powf(fabsf(pA[i] - pB[i]),order);
			
 
				     }
			
 
				 
			
 
				 
			
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c
@@ -64,7 +64,7 @@ void arm_biquad_cascade_df1_f16(
 
				     float16_t Xn1, Xn2, Yn1, Yn2;   /*  Filter pState variables   */
			
 
				     float16_t X0, X1, X2, X3;   /*  temporary input           */
			
 
				     float16_t X4, X5, X6, X7;   /*  temporary input           */
			
 
				-    float16_t lastX, lastY;             /*  X,Y history for tail handling */
			
 
				+    _Float16 lastX, lastY;             /*  X,Y history for tail handling */
			
 
				     f16x8_t coeffs;
			
 
				     f16x8_t accVec;           /* accumultor vector */
			
 
				     uint32_t  sample, stage = S->numStages; /*  loop counters             */
			
@@ -320,10 +320,10 @@ void arm_biquad_cascade_df1_f16(
 
				         float16_t *pOut = pDst;                        /* Destination pointer */
			
 
				         float16_t *pState = S->pState;                 /* pState pointer */
			
 
				   const float16_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
			
 
				-        float16_t acc;                                 /* Accumulator */
			
 
				-        float16_t b0, b1, b2, a1, a2;                  /* Filter coefficients */
			
 
				-        float16_t Xn1, Xn2, Yn1, Yn2;                  /* Filter pState variables */
			
 
				-        float16_t Xn;                                  /* Temporary input */
			
 
				+        _Float16 acc;                                 /* Accumulator */
			
 
				+        _Float16 b0, b1, b2, a1, a2;                  /* Filter coefficients */
			
 
				+        _Float16 Xn1, Xn2, Yn1, Yn2;                  /* Filter pState variables */
			
 
				+        _Float16 Xn;                                  /* Temporary input */
			
 
				         uint32_t sample, stage = S->numStages;         /* Loop counters */
			
 
				 
			
 
				   do
			
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c
@@ -197,10 +197,10 @@ void arm_biquad_cascade_df2T_f16(
 
				         float16_t *pOut = pDst;                        /* Destination pointer */
			
 
				         float16_t *pState = S->pState;                 /* State pointer */
			
 
				   const float16_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
			
 
				-        float16_t acc1;                                /* Accumulator */
			
 
				-        float16_t b0, b1, b2, a1, a2;                  /* Filter coefficients */
			
 
				-        float16_t Xn1;                                 /* Temporary input */
			
 
				-        float16_t d1, d2;                              /* State variables */
			
 
				+        _Float16 acc1;                                /* Accumulator */
			
 
				+        _Float16 b0, b1, b2, a1, a2;                  /* Filter coefficients */
			
 
				+        _Float16 Xn1;                                 /* Temporary input */
			
 
				+        _Float16 d1, d2;                              /* State variables */
			
 
				         uint32_t sample, stage = S->numStages;         /* Loop counters */
			
 
				 
			
 
				   do
			
--- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c
@@ -198,10 +198,10 @@ void arm_biquad_cascade_stereo_df2T_f16(
 
				         float16_t *pOut = pDst;                        /* Destination pointer */
			
 
				         float16_t *pState = S->pState;                 /* State pointer */
			
 
				   const float16_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
			
 
				-        float16_t acc1a, acc1b;                        /* Accumulator */
			
 
				-        float16_t b0, b1, b2, a1, a2;                  /* Filter coefficients */
			
 
				-        float16_t Xn1a, Xn1b;                          /* Temporary input */
			
 
				-        float16_t d1a, d2a, d1b, d2b;                  /* State variables */
			
 
				+        _Float16 acc1a, acc1b;                        /* Accumulator */
			
 
				+        _Float16 b0, b1, b2, a1, a2;                  /* Filter coefficients */
			
 
				+        _Float16 Xn1a, Xn1b;                          /* Temporary input */
			
 
				+        _Float16 d1a, d2a, d1b, d2b;                  /* State variables */
			
 
				         uint32_t sample, stage = S->numStages;         /* Loop counters */
			
 
				 
			
 
				     do
			
--- a/Source/FilteringFunctions/arm_correlate_f16.c
+++ b/Source/FilteringFunctions/arm_correlate_f16.c
@@ -370,8 +370,8 @@ void arm_correlate_f16(
 
				     for (i = 0U; i <= block1 - 2; i += 2)
			
 
				     {
			
 
				         uint32_t  count = i + 1;
			
 
				-        float16_t acc0;
			
 
				-        float16_t acc1;
			
 
				+        _Float16 acc0;
			
 
				+        _Float16 acc1;
			
 
				         /*
			
 
				          * compute 2 accumulators per loop
			
 
				          * size is incrementing for second accumulator
			
@@ -390,7 +390,7 @@ void arm_correlate_f16(
 
				     for (; i < block1; i++)
			
 
				     {
			
 
				         uint32_t  count = i + 1;
			
 
				-        float16_t acc;
			
 
				+        _Float16 acc;
			
 
				 
			
 
				         pX = pA;
			
 
				         pY = pB;
			
@@ -403,10 +403,10 @@ void arm_correlate_f16(
 
				 
			
 
				     for (i = 0U; i <= block2 - 4; i += 4)
			
 
				     {
			
 
				-        float16_t acc0;
			
 
				-        float16_t acc1;
			
 
				-        float16_t acc2;
			
 
				-        float16_t acc3;
			
 
				+        _Float16 acc0;
			
 
				+        _Float16 acc1;
			
 
				+        _Float16 acc2;
			
 
				+        _Float16 acc3;
			
 
				 
			
 
				         pX = pA;
			
 
				         pY = pB;
			
@@ -430,8 +430,8 @@ void arm_correlate_f16(
 
				 
			
 
				     for (; i <= block2 - 2; i += 2)
			
 
				     {
			
 
				-        float16_t acc0;
			
 
				-        float16_t acc1;
			
 
				+        _Float16 acc0;
			
 
				+        _Float16 acc1;
			
 
				 
			
 
				         pX = pA;
			
 
				         pY = pB;
			
@@ -451,7 +451,7 @@ void arm_correlate_f16(
 
				 
			
 
				     if (block2 & 1)
			
 
				     {
			
 
				-        float16_t acc;
			
 
				+        _Float16 acc;
			
 
				 
			
 
				         pX = pA;
			
 
				         pY = pB;
			
@@ -466,8 +466,8 @@ void arm_correlate_f16(
 
				     {
			
 
				 
			
 
				         uint32_t  count = (i + 1);
			
 
				-        float16_t acc0;
			
 
				-        float16_t acc1;
			
 
				+        _Float16 acc0;
			
 
				+        _Float16 acc1;
			
 
				 
			
 
				         pX = pA;
			
 
				         pY = pB;
			
@@ -488,7 +488,7 @@ void arm_correlate_f16(
 
				     for (; i >= 0; i--)
			
 
				     {
			
 
				         uint32_t  count = (i + 1);
			
 
				-        float16_t acc;
			
 
				+        _Float16 acc;
			
 
				 
			
 
				         pX = pA;
			
 
				         pY = pB;
			
@@ -517,15 +517,15 @@ void arm_correlate_f16(
 
				   const float16_t *px;                                 /* Intermediate inputA pointer */
			
 
				   const float16_t *py;                                 /* Intermediate inputB pointer */
			
 
				   const float16_t *pSrc1;
			
 
				-        float16_t sum;
			
 
				+        _Float16 sum;
			
 
				         uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
			
 
				         uint32_t j, k, count, blkCnt;                  /* Loop counters */
			
 
				         uint32_t outBlockSize;                         /* Loop counter */
			
 
				         int32_t inc = 1;                               /* Destination address modifier */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) 
			
 
				-    float16_t acc0, acc1, acc2, acc3,c0;                    /* Accumulators */
			
 
				-    float16_t x0, x1, x2, x3;                        /* temporary variables for holding input and coefficient values */
			
 
				+    _Float16 acc0, acc1, acc2, acc3,c0;                    /* Accumulators */
			
 
				+    _Float16 x0, x1, x2, x3;                        /* temporary variables for holding input and coefficient values */
			
 
				 #endif
			
 
				 
			
 
				   /* The algorithm implementation is based on the lengths of the inputs. */
			
@@ -625,7 +625,7 @@ void arm_correlate_f16(
 
				   while (blockSize1 > 0U)
			
 
				   {
			
 
				     /* Accumulator is made zero for every iteration */
			
 
				-    sum = 0.0f;
			
 
				+    sum = 0.0f16;
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) 
			
 
				 
			
@@ -725,10 +725,10 @@ void arm_correlate_f16(
 
				     while (blkCnt > 0U)
			
 
				     {
			
 
				       /* Set all accumulators to zero */
			
 
				-      acc0 = 0.0f;
			
 
				-      acc1 = 0.0f;
			
 
				-      acc2 = 0.0f;
			
 
				-      acc3 = 0.0f;
			
 
				+      acc0 = 0.0f16;
			
 
				+      acc1 = 0.0f16;
			
 
				+      acc2 = 0.0f16;
			
 
				+      acc3 = 0.0f16;
			
 
				 
			
 
				 
			
 
				       /* read x[0], x[1], x[2] samples */
			
@@ -873,7 +873,7 @@ void arm_correlate_f16(
 
				     while (blkCnt > 0U)
			
 
				     {
			
 
				       /* Accumulator is made zero for every iteration */
			
 
				-      sum = 0.0f;
			
 
				+      sum = 0.0f16;
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) 
			
 
				 
			
@@ -939,7 +939,7 @@ void arm_correlate_f16(
 
				     while (blkCnt > 0U)
			
 
				     {
			
 
				       /* Accumulator is made zero for every iteration */
			
 
				-      sum = 0.0f;
			
 
				+      sum = 0.0f16;
			
 
				 
			
 
				       /* Loop over srcBLen */
			
 
				       k = srcBLen;
			
@@ -1000,7 +1000,7 @@ void arm_correlate_f16(
 
				   while (blockSize3 > 0U)
			
 
				   {
			
 
				     /* Accumulator is made zero for every iteration */
			
 
				-    sum = 0.0f;
			
 
				+    sum = 0.0f16;
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) 
			
 
				 
			
@@ -1069,7 +1069,7 @@ void arm_correlate_f16(
 
				 
			
 
				   const float16_t *pIn1 = pSrcA;                       /* inputA pointer */
			
 
				   const float16_t *pIn2 = pSrcB + (srcBLen - 1U);      /* inputB pointer */
			
 
				-        float16_t sum;                                 /* Accumulator */
			
 
				+        _Float16 sum;                                 /* Accumulator */
			
 
				         uint32_t i = 0U, j;                            /* Loop counters */
			
 
				         uint32_t inv = 0U;                             /* Reverse order flag */
			
 
				         uint32_t tot = 0U;                             /* Length */
			
@@ -1127,7 +1127,7 @@ void arm_correlate_f16(
 
				   for (i = 0U; i <= tot; i++)
			
 
				   {
			
 
				     /* Initialize sum with zero to carry out MAC operations */
			
 
				-    sum = 0.0f;
			
 
				+    sum = 0.0f16;
			
 
				 
			
 
				     /* Loop to perform MAC operations according to convolution equation */
			
 
				     for (j = 0U; j <= i; j++)
			
--- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
+++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
@@ -711,13 +711,13 @@ arm_status arm_mat_cmplx_mult_f16(
 
				   uint16_t numRowsA = pSrcA->numRows;            /* Number of rows of input matrix A */
			
 
				   uint16_t numColsB = pSrcB->numCols;            /* Number of columns of input matrix B */
			
 
				   uint16_t numColsA = pSrcA->numCols;            /* Number of columns of input matrix A */
			
 
				-  float16_t sumReal, sumImag;                    /* Accumulator */
			
 
				-  float16_t a1, b1, c1, d1;
			
 
				+  _Float16 sumReal, sumImag;                    /* Accumulator */
			
 
				+  _Float16 a1, b1, c1, d1;
			
 
				   uint32_t col, i = 0U, j, row = numRowsA, colCnt; /* loop counters */
			
 
				   arm_status status;                             /* status of matrix multiplication */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL)
			
 
				-  float16_t a0, b0, c0, d0;
			
 
				+  _Float16 a0, b0, c0, d0;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef ARM_MATH_MATRIX_CHECK
			
@@ -755,8 +755,8 @@ arm_status arm_mat_cmplx_mult_f16(
 
				       do
			
 
				       {
			
 
				         /* Set the variable sum, that acts as accumulator, to zero */
			
 
				-        sumReal = 0.0f;
			
 
				-        sumImag = 0.0f;
			
 
				+        sumReal = 0.0f16;
			
 
				+        sumImag = 0.0f16;
			
 
				 
			
 
				         /* Initiate pointer pIn1 to point to starting address of column being processed */
			
 
				         pIn1 = pInA;
			
--- a/Source/MatrixFunctions/arm_mat_inverse_f16.c
+++ b/Source/MatrixFunctions/arm_mat_inverse_f16.c
@@ -66,7 +66,7 @@ arm_status arm_mat_inverse_f16(
 
				     uint32_t  numCols = pSrc->numCols;  /* Number of Cols in the matrix  */
			
 
				     float16_t *pTmpA, *pTmpB;
			
 
				 
			
 
				-    float16_t in = 0.0f;        /* Temporary input values  */
			
 
				+    _Float16 in = 0.0f16;        /* Temporary input values  */
			
 
				     uint32_t  i, rowCnt, flag = 0U, j, loopCnt, k, l;   /* loop counters */
			
 
				     arm_status status;          /* status of matrix inverse */
			
 
				     uint32_t  blkCnt;
			
@@ -137,20 +137,20 @@ arm_status arm_mat_inverse_f16(
 
				             j = numRows - rowCnt;
			
 
				             while (j > 0U)
			
 
				             {
			
 
				-                *pOutT1++ = 0.0f;
			
 
				+                *pOutT1++ = 0.0f16;
			
 
				                 j--;
			
 
				             }
			
 
				             /*
			
 
				              * Writing all ones in the diagonal of the destination matrix
			
 
				              */
			
 
				-            *pOutT1++ = 1.0f;
			
 
				+            *pOutT1++ = 1.0f16;
			
 
				             /*
			
 
				              * Writing all zeroes in upper triangle of the destination matrix
			
 
				              */
			
 
				             j = rowCnt - 1U;
			
 
				             while (j > 0U)
			
 
				             {
			
 
				-                *pOutT1++ = 0.0f;
			
 
				+                *pOutT1++ = 0.0f16;
			
 
				                 j--;
			
 
				             }
			
 
				             /*
			
@@ -199,7 +199,7 @@ arm_status arm_mat_inverse_f16(
 
				             /*
			
 
				              * Check if the pivot element is zero
			
 
				              */
			
 
				-            if (*pInT1 == 0.0f)
			
 
				+            if (*pInT1 == 0.0f16)
			
 
				             {
			
 
				                 /*
			
 
				                  * Loop over the number rows present below
			
@@ -215,7 +215,7 @@ arm_status arm_mat_inverse_f16(
 
				                      * Check if there is a non zero pivot element to
			
 
				                      * * replace in the rows below
			
 
				                      */
			
 
				-                    if (*pInT2 != 0.0f)
			
 
				+                    if (*pInT2 != 0.0f16)
			
 
				                     {
			
 
				                         f16x8_t vecA, vecB;
			
 
				                         /*
			
@@ -310,7 +310,7 @@ arm_status arm_mat_inverse_f16(
 
				             /*
			
 
				              * Update the status if the matrix is singular
			
 
				              */
			
 
				-            if ((flag != 1U) && (in == 0.0f))
			
 
				+            if ((flag != 1U) && (in == 0.0f16))
			
 
				             {
			
 
				                 return ARM_MATH_SINGULAR;
			
 
				             }
			
@@ -334,7 +334,7 @@ arm_status arm_mat_inverse_f16(
 
				 
			
 
				             pTmpA = pInT1;
			
 
				 
			
 
				-            f16x8_t invIn = vdupq_n_f16(1.0f / in);
			
 
				+            f16x8_t invIn = vdupq_n_f16(1.0f16 / in);
			
 
				 
			
 
				             blkCnt = (numCols - l) >> 3;
			
 
				             f16x8_t vecA;
			
@@ -537,12 +537,12 @@ arm_status arm_mat_inverse_f16(
 
				          */
			
 
				         status = ARM_MATH_SUCCESS;
			
 
				 
			
 
				-        if ((flag != 1U) && (in == 0.0f))
			
 
				+        if ((flag != 1U) && (in == 0.0f16))
			
 
				         {
			
 
				             pIn = pSrc->pData;
			
 
				             for (i = 0; i < numRows * numCols; i++)
			
 
				             {
			
 
				-                if (pIn[i] != 0.0f)
			
 
				+                if (pIn[i] != 0.0f16)
			
 
				                     break;
			
 
				             }
			
 
				 
			
@@ -568,7 +568,7 @@ arm_status arm_mat_inverse_f16(
 
				   uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
			
 
				   uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
			
 
				 
			
 
				-  float16_t Xchg, in = 0.0f, in1;                /* Temporary input values  */
			
 
				+  _Float16 Xchg, in = 0.0f16, in1;                /* Temporary input values  */
			
 
				   uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l;      /* loop counters */
			
 
				   arm_status status;                             /* status of matrix inverse */
			
 
				 
			
@@ -636,18 +636,18 @@ arm_status arm_mat_inverse_f16(
 
				       j = numRows - rowCnt;
			
 
				       while (j > 0U)
			
 
				       {
			
 
				-        *pOutT1++ = 0.0f;
			
 
				+        *pOutT1++ = 0.0f16;
			
 
				         j--;
			
 
				       }
			
 
				 
			
 
				       /* Writing all ones in the diagonal of the destination matrix */
			
 
				-      *pOutT1++ = 1.0f;
			
 
				+      *pOutT1++ = 1.0f16;
			
 
				 
			
 
				       /* Writing all zeroes in upper triangle of the destination matrix */
			
 
				       j = rowCnt - 1U;
			
 
				       while (j > 0U)
			
 
				       {
			
 
				-        *pOutT1++ = 0.0f;
			
 
				+        *pOutT1++ = 0.0f16;
			
 
				         j--;
			
 
				       }
			
 
				 
			
@@ -685,7 +685,7 @@ arm_status arm_mat_inverse_f16(
 
				       k = 1U;
			
 
				 
			
 
				       /* Check if the pivot element is zero */
			
 
				-      if (*pInT1 == 0.0f)
			
 
				+      if (*pInT1 == 0.0f16)
			
 
				       {
			
 
				         /* Loop over the number rows present below */
			
 
				 
			
@@ -697,7 +697,7 @@ arm_status arm_mat_inverse_f16(
 
				 
			
 
				           /* Check if there is a non zero pivot element to
			
 
				            * replace in the rows below */
			
 
				-          if (*pInT2 != 0.0f)
			
 
				+          if (*pInT2 != 0.0f16)
			
 
				           {
			
 
				             /* Loop over number of columns
			
 
				              * to the right of the pilot element */
			
@@ -743,7 +743,7 @@ arm_status arm_mat_inverse_f16(
 
				       }
			
 
				 
			
 
				       /* Update the status if the matrix is singular */
			
 
				-      if ((flag != 1U) && (in == 0.0f))
			
 
				+      if ((flag != 1U) && (in == 0.0f16))
			
 
				       {
			
 
				         return ARM_MATH_SINGULAR;
			
 
				       }
			
@@ -877,12 +877,12 @@ arm_status arm_mat_inverse_f16(
 
				     /* Set status as ARM_MATH_SUCCESS */
			
 
				     status = ARM_MATH_SUCCESS;
			
 
				 
			
 
				-    if ((flag != 1U) && (in == 0.0f))
			
 
				+    if ((flag != 1U) && (in == 0.0f16))
			
 
				     {
			
 
				       pIn = pSrc->pData;
			
 
				       for (i = 0; i < numRows * numCols; i++)
			
 
				       {
			
 
				-        if (pIn[i] != 0.0f)
			
 
				+        if (pIn[i] != 0.0f16)
			
 
				             break;
			
 
				       }
			
 
				 
			
--- a/Source/MatrixFunctions/arm_mat_mult_f16.c
+++ b/Source/MatrixFunctions/arm_mat_mult_f16.c
@@ -632,7 +632,7 @@ arm_status arm_mat_mult_f16(
 
				   float16_t *pInB = pSrcB->pData;                /* Input data matrix pointer B */
			
 
				   float16_t *pOut = pDst->pData;                 /* Output data matrix pointer */
			
 
				   float16_t *px;                                 /* Temporary output data matrix pointer */
			
 
				-  float16_t sum;                                 /* Accumulator */
			
 
				+  _Float16 sum;                                 /* Accumulator */
			
 
				   uint16_t numRowsA = pSrcA->numRows;            /* Number of rows of input matrix A */
			
 
				   uint16_t numColsB = pSrcB->numCols;            /* Number of columns of input matrix B */
			
 
				   uint16_t numColsA = pSrcA->numCols;            /* Number of columns of input matrix A */
			
@@ -671,7 +671,7 @@ arm_status arm_mat_mult_f16(
 
				       do
			
 
				       {
			
 
				         /* Set the variable sum, that acts as accumulator, to zero */
			
 
				-        sum = 0.0f;
			
 
				+        sum = 0.0f16;
			
 
				 
			
 
				         /* Initialize pointer pIn1 to point to starting address of column being processed */
			
 
				         pIn1 = pInA;
			
--- a/Source/SVMFunctions/arm_svm_linear_predict_f16.c
+++ b/Source/SVMFunctions/arm_svm_linear_predict_f16.c
@@ -151,7 +151,7 @@ void arm_svm_linear_predict_f16(
 
				         acc0 = vfmaq_n_f16(acc0,acc2,*pDualCoef++);
			
 
				         acc0 = vfmaq_n_f16(acc0,acc3,*pDualCoef++);
			
 
				 
			
 
				-        sum += vecAddAcrossF16Mve(acc0);
			
 
				+        sum += (_Float16)vecAddAcrossF16Mve(acc0);
			
 
				 
			
 
				         pSrcA += numCols * 4;
			
 
				         /*
			
@@ -221,7 +221,7 @@ void arm_svm_linear_predict_f16(
 
				         acc0 = vmulq_n_f16(acc0,*pDualCoef++);
			
 
				         acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++);
			
 
				 
			
 
				-        sum += vecAddAcrossF16Mve(acc0);
			
 
				+        sum += (_Float16)vecAddAcrossF16Mve(acc0);
			
 
				 
			
 
				         pSrcA += numCols * 2;
			
 
				         row -= 2;
			
@@ -274,7 +274,7 @@ void arm_svm_linear_predict_f16(
 
				         /*
			
 
				          * Sum the partial parts
			
 
				          */
			
 
				-        sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0);
			
 
				+        sum += (_Float16)*pDualCoef++ * (_Float16)vecAddAcrossF16Mve(acc0);
			
 
				 
			
 
				     }
			
 
				 
			
--- a/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
+++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
@@ -294,7 +294,7 @@ void arm_svm_polynomial_predict_f16(
 
				                              (vaddq_n_f16(vmulq_n_f16(vtmp, S->gamma), S->coef0), S->degree), 
			
 
				                              vctp16q(1));
			
 
				     }
			
 
				-    sum += vecAddAcrossF16Mve(vSum);
			
 
				+    sum += (_Float16)vecAddAcrossF16Mve(vSum);
			
 
				 
			
 
				     
			
 
				     *pResult = S->classes[STEP(sum)];
			
@@ -316,9 +316,9 @@ void arm_svm_polynomial_predict_f16(
 
				         dot=0;
			
 
				         for(j=0; j < S->vectorDimension; j++)
			
 
				         {
			
 
				-            dot = dot + in[j]* *pSupport++;
			
 
				+            dot = dot + (_Float16)in[j]* (_Float16)*pSupport++;
			
 
				         }
			
 
				-        sum += S->dualCoefficients[i] * arm_exponent_f16(S->gamma * dot + S->coef0, S->degree);
			
 
				+        sum += S->dualCoefficients[i] * (_Float16)arm_exponent_f16(S->gamma * dot + S->coef0, S->degree);
			
 
				     }
			
 
				 
			
 
				     *pResult=S->classes[STEP(sum)];
			
--- a/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
+++ b/Source/SVMFunctions/arm_svm_rbf_predict_f16.c
@@ -67,7 +67,7 @@ void arm_svm_rbf_predict_f16(
 
				     uint32_t         row;
			
 
				     uint32_t         blkCnt;     /* loop counters */
			
 
				     const float16_t *pDualCoef = S->dualCoefficients;
			
 
				-    float16_t       sum = S->intercept;
			
 
				+    _Float16       sum = S->intercept;
			
 
				     f16x8_t         vSum = vdupq_n_f16(0);
			
 
				 
			
 
				     row = numRows;
			
@@ -323,19 +323,19 @@ void arm_svm_rbf_predict_f16(
 
				     int32_t * pResult)
			
 
				 {
			
 
				     _Float16 sum=S->intercept;
			
 
				-    _Float16 dot=0;
			
 
				+    _Float16 dot=00.f16;
			
 
				     uint32_t i,j;
			
 
				     const float16_t *pSupport = S->supportVectors;
			
 
				 
			
 
				     for(i=0; i < S->nbOfSupportVectors; i++)
			
 
				     {
			
 
				-        dot=0;
			
 
				+        dot=0.0f16;
			
 
				         for(j=0; j < S->vectorDimension; j++)
			
 
				         {
			
 
				-            dot = dot + SQ(in[j] - *pSupport);
			
 
				+            dot = dot + SQ((_Float16)in[j] - (_Float16) *pSupport);
			
 
				             pSupport++;
			
 
				         }
			
 
				-        sum += S->dualCoefficients[i] * expf(-S->gamma * dot);
			
 
				+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)expf(-(_Float16)S->gamma * dot);
			
 
				     }
			
 
				     *pResult=S->classes[STEP(sum)];
			
 
				 }
			
--- a/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
+++ b/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c
@@ -67,7 +67,7 @@ void arm_svm_sigmoid_predict_f16(
 
				     uint32_t         row;
			
 
				     uint32_t         blkCnt;     /* loop counters */
			
 
				     const float16_t *pDualCoef = S->dualCoefficients;
			
 
				-    float16_t       sum = S->intercept;
			
 
				+    _Float16       sum = S->intercept;
			
 
				     f16x8_t         vSum = vdupq_n_f16(0.0f);
			
 
				 
			
 
				     row = numRows;
			
@@ -305,18 +305,18 @@ void arm_svm_sigmoid_predict_f16(
 
				     int32_t * pResult)
			
 
				 {
			
 
				     _Float16 sum=S->intercept;
			
 
				-    _Float16 dot=0;
			
 
				+    _Float16 dot=0.0f16;
			
 
				     uint32_t i,j;
			
 
				     const float16_t *pSupport = S->supportVectors;
			
 
				 
			
 
				     for(i=0; i < S->nbOfSupportVectors; i++)
			
 
				     {
			
 
				-        dot=0;
			
 
				+        dot=0.0f16;
			
 
				         for(j=0; j < S->vectorDimension; j++)
			
 
				         {
			
 
				-            dot = dot + in[j]* *pSupport++;
			
 
				+            dot = dot + (_Float16)in[j] * (_Float16)*pSupport++;
			
 
				         }
			
 
				-        sum += S->dualCoefficients[i] * tanhf(S->gamma * dot + S->coef0);
			
 
				+        sum += (_Float16)S->dualCoefficients[i] * (_Float16)tanhf((_Float16)S->gamma * dot + (_Float16)S->coef0);
			
 
				     }
			
 
				     *pResult=S->classes[STEP(sum)];
			
 
				 }
			
--- a/Source/StatisticsFunctions/arm_entropy_f16.c
+++ b/Source/StatisticsFunctions/arm_entropy_f16.c
@@ -65,7 +65,7 @@
 
				 float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
			
 
				 {
			
 
				     uint32_t        blkCnt;
			
 
				-    float16_t       accum=0.0f,p;
			
 
				+    _Float16       accum=0.0f16,p;
			
 
				 
			
 
				 
			
 
				     blkCnt = blockSize;
			
@@ -110,7 +110,7 @@ float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
 
				 {
			
 
				     const float16_t *pIn;
			
 
				     uint32_t blkCnt;
			
 
				-    float16_t accum, p;
			
 
				+    _Float16 accum, p;
			
 
				  
			
 
				     pIn = pSrcA;
			
 
				     blkCnt = blockSize;
			
--- a/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
+++ b/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
@@ -72,12 +72,12 @@
 
				 float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
			
 
				 {
			
 
				     uint32_t blkCnt;
			
 
				-    float16_t accum, pA,pB;
			
 
				+    _Float16 accum, pA,pB;
			
 
				  
			
 
				     
			
 
				     blkCnt = blockSize;
			
 
				 
			
 
				-    accum = 0.0f;
			
 
				+    accum = 0.0f16;
			
 
				 
			
 
				     f16x8_t         vSum = vdupq_n_f16(0.0f);
			
 
				     blkCnt = blockSize >> 3;
			
@@ -120,7 +120,7 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSr
 
				 {
			
 
				     const float16_t *pInA, *pInB;
			
 
				     uint32_t blkCnt;
			
 
				-    float16_t accum, pA,pB;
			
 
				+    _Float16 accum, pA,pB;
			
 
				  
			
 
				     pInA = pSrcA;
			
 
				     pInB = pSrcB;
			
--- a/Source/StatisticsFunctions/arm_logsumexp_f16.c
+++ b/Source/StatisticsFunctions/arm_logsumexp_f16.c
@@ -72,8 +72,8 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
 
				     float16_t       maxVal;
			
 
				     const float16_t *pIn;
			
 
				     int32_t         blkCnt;
			
 
				-    float16_t       accum=0.0f16;
			
 
				-    float16_t       tmp;
			
 
				+    _Float16       accum=0.0f16;
			
 
				+    _Float16       tmp;
			
 
				 
			
 
				 
			
 
				     arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
			
@@ -122,11 +122,11 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
 
				 #else
			
 
				 float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
			
 
				 {
			
 
				-    float16_t maxVal;
			
 
				-    float16_t tmp;
			
 
				+    _Float16 maxVal;
			
 
				+    _Float16 tmp;
			
 
				     const float16_t *pIn;
			
 
				     uint32_t blkCnt;
			
 
				-    float16_t accum;
			
 
				+    _Float16 accum;
			
 
				  
			
 
				     pIn = in;
			
 
				     blkCnt = blockSize;
			
--- a/Source/StatisticsFunctions/arm_power_f16.c
+++ b/Source/StatisticsFunctions/arm_power_f16.c
@@ -88,8 +88,8 @@ void arm_power_f16(
 
				         float16_t * pResult)
			
 
				 {
			
 
				         uint32_t blkCnt;                               /* Loop counter */
			
 
				-        float16_t sum = 0.0f;                          /* Temporary result storage */
			
 
				-        float16_t in;                                  /* Temporary variable to store input value */
			
 
				+        _Float16 sum = 0.0f16;                          /* Temporary result storage */
			
 
				+        _Float16 in;                                  /* Temporary variable to store input value */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
			
 
				 
			
--- a/Source/StatisticsFunctions/arm_rms_f16.c
+++ b/Source/StatisticsFunctions/arm_rms_f16.c
@@ -83,8 +83,8 @@ void arm_rms_f16(
 
				         float16_t * pResult)
			
 
				 {
			
 
				         uint32_t blkCnt;                               /* Loop counter */
			
 
				-        float16_t sum = 0.0f;                          /* Temporary result storage */
			
 
				-        float16_t in;                                  /* Temporary variable to store input value */
			
 
				+        _Float16 sum = 0.0f16;                          /* Temporary result storage */
			
 
				+        _Float16 in;                                  /* Temporary variable to store input value */
			
 
				 
			
 
				 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
			
 
				 
			
--- a/Testing/FrameworkSource/Error.cpp
+++ b/Testing/FrameworkSource/Error.cpp
@@ -342,7 +342,7 @@ void assert_close_error(unsigned long nb,float64_t &ref, float64_t &val, double
 
				     if (abs(val - ref) > (absthreshold + relthreshold * abs(ref)))
			
 
				     {
			
 
				         char details[200];
			
 
				-        sprintf(details,"close %g : abs=%g, rel=%g",abs(val - ref) , absthreshold,relthreshold);
			
 
				+        sprintf(details,"close error %g > %g: (val = %g, ref = %g)",abs(val - ref) , absthreshold + relthreshold * abs(ref),val,ref);
			
 
				         throw (Error(CLOSE_ERROR,nb,details));
			
 
				     }
			
 
				 };
			
@@ -385,7 +385,7 @@ void assert_close_error(unsigned long nb,float32_t &ref, float32_t &val, double
 
				     if (abs(val - ref) > (absthreshold + relthreshold * abs(ref)))
			
 
				     {
			
 
				         char details[200];
			
 
				-        sprintf(details,"close %g : abs=%g, rel=%g",abs(val - ref) , absthreshold,relthreshold);
			
 
				+        sprintf(details,"close error %g > %g: (val = %g, ref = %g)",abs(val - ref) , absthreshold + relthreshold * abs(ref),val,ref);
			
 
				         throw (Error(CLOSE_ERROR,nb,details));
			
 
				     }
			
 
				 };
			
@@ -429,7 +429,7 @@ void assert_close_error(unsigned long nb,float16_t &ref, float16_t &val, double
 
				     if (abs((float)val - (float)ref) > (absthreshold + relthreshold * abs((float)ref)))
			
 
				     {
			
 
				         char details[200];
			
 
				-        sprintf(details,"close %g : abs=%g, rel=%g",abs(val - ref) , absthreshold,relthreshold);
			
 
				+        sprintf(details,"close error %g > %g: (val = %g, ref = %g)",abs(val - ref) , absthreshold + relthreshold * abs(ref),val,ref);
			
 
				         throw (Error(CLOSE_ERROR,nb,details));
			
 
				     }
			
 
				 };
			
--- a/Testing/PatternGeneration/Support.py
+++ b/Testing/PatternGeneration/Support.py
@@ -11,7 +11,7 @@ NBVECTORS = [4,10,16]
 
				 
			
 
				 VECDIM = [12,14,20]
			
 
				 
			
 
				-def genWsum(config,nb):
			
 
				+def genWsum(config,f,nb):
			
 
				     DIM=50
			
 
				     inputs=[] 
			
 
				     weights=[]
			
@@ -23,15 +23,15 @@ def genWsum(config,nb):
 
				     inputs += list(va)
			
 
				     weights += list(vb)
			
 
				 
			
 
				-    nbiters = Tools.loopnb(0,Tools.TAILONLY)
			
 
				+    nbiters = Tools.loopnb(f,Tools.TAILONLY)
			
 
				     e = np.sum(va[0:nbiters].T * vb[0:nbiters]) / np.sum(vb[0:nbiters]) 
			
 
				     output.append(e)
			
 
				 
			
 
				-    nbiters = Tools.loopnb(0,Tools.BODYONLY)
			
 
				+    nbiters = Tools.loopnb(f,Tools.BODYONLY)
			
 
				     e = np.sum(va[0:nbiters].T * vb[0:nbiters]) / np.sum(vb[0:nbiters]) 
			
 
				     output.append(e)
			
 
				 
			
 
				-    nbiters = Tools.loopnb(0,Tools.BODYANDTAIL)
			
 
				+    nbiters = Tools.loopnb(f,Tools.BODYANDTAIL)
			
 
				     e = np.sum(va[0:nbiters].T * vb[0:nbiters]) / np.sum(vb[0:nbiters]) 
			
 
				     output.append(e)
			
 
				 
			
@@ -92,7 +92,7 @@ def writeTestsF32(config):
 
				 
			
 
				 
			
 
				     # This is for benchmarking the weighted sum and we use only one test pattern
			
 
				-    genWsum(config,6)
			
 
				+    genWsum(config,Tools.F32,6)
			
 
				     
			
 
				 
			
 
				 def writeTestsF16(config):
			
@@ -105,7 +105,7 @@ def writeTestsF16(config):
 
				     config.writeInput(11,va,"Samples")
			
 
				 
			
 
				     # This is for benchmarking the weighted sum and we use only one test pattern
			
 
				-    genWsum(config,6)
			
 
				+    genWsum(config,Tools.F16,6)
			
 
				 
			
 
				 def writeTestsQ31(config):
			
 
				     NBSAMPLES=256
			
--- a/Testing/Patterns/DSP/Support/SupportF16/Inputs6_f16.txt
+++ b/Testing/Patterns/DSP/Support/SupportF16/Inputs6_f16.txt
@@ -1,102 +1,102 @@
 
				 H
			
 
				 50
			
 
				-// 0.549118
			
 
				-0x3865
			
 
				-// 0.782574
			
 
				-0x3a43
			
 
				-// 0.537389
			
 
				-0x384d
			
 
				-// 0.576597
			
 
				-0x389d
			
 
				-// 0.849496
			
 
				-0x3acc
			
 
				-// 0.764850
			
 
				-0x3a1e
			
 
				-// 0.539151
			
 
				-0x3850
			
 
				-// 0.459163
			
 
				-0x3759
			
 
				-// 0.352476
			
 
				-0x35a4
			
 
				-// 0.333343
			
 
				-0x3555
			
 
				-// 0.362662
			
 
				-0x35cd
			
 
				-// 0.343928
			
 
				-0x3581
			
 
				-// 0.046084
			
 
				-0x29e6
			
 
				-// 0.583088
			
 
				-0x38aa
			
 
				-// 0.464789
			
 
				-0x3770
			
 
				-// 0.659567
			
 
				-0x3947
			
 
				-// 0.177360
			
 
				-0x31ad
			
 
				-// 0.465121
			
 
				-0x3771
			
 
				-// 0.849010
			
 
				-0x3acb
			
 
				-// 0.748383
			
 
				-0x39fd
			
 
				-// 0.493254
			
 
				-0x37e4
			
 
				-// 0.023579
			
 
				-0x2609
			
 
				-// 0.354457
			
 
				-0x35ac
			
 
				-// 0.880690
			
 
				-0x3b0c
			
 
				-// 0.029813
			
 
				-0x27a2
			
 
				-// 0.338038
			
 
				-0x3569
			
 
				-// 0.724386
			
 
				-0x39cc
			
 
				-// 0.341454
			
 
				-0x3577
			
 
				-// 0.676073
			
 
				-0x3969
			
 
				-// 0.041176
			
 
				-0x2945
			
 
				-// 0.214019
			
 
				-0x32d9
			
 
				-// 0.322539
			
 
				-0x3529
			
 
				-// 0.499026
			
 
				-0x37fc
			
 
				-// 0.616038
			
 
				-0x38ee
			
 
				-// 0.688882
			
 
				-0x3983
			
 
				-// 0.066439
			
 
				-0x2c41
			
 
				-// 0.094981
			
 
				-0x2e14
			
 
				-// 0.873092
			
 
				-0x3afc
			
 
				-// 0.642707
			
 
				-0x3924
			
 
				-// 0.537527
			
 
				-0x384d
			
 
				-// 0.370953
			
 
				-0x35ef
			
 
				-// 0.450042
			
 
				-0x3733
			
 
				-// 0.434679
			
 
				-0x36f4
			
 
				-// 0.676789
			
 
				-0x396a
			
 
				-// 0.303649
			
 
				-0x34dc
			
 
				-// 0.076930
			
 
				-0x2cec
			
 
				-// 0.871305
			
 
				-0x3af8
			
 
				-// 0.855053
			
 
				-0x3ad7
			
 
				-// 0.498935
			
 
				-0x37fc
			
 
				-// 0.854980
			
 
				-0x3ad7
			
 
				+// 0.145259
			
 
				+0x30a6
			
 
				+// 0.431775
			
 
				+0x36e9
			
 
				+// 0.925328
			
 
				+0x3b67
			
 
				+// 0.170239
			
 
				+0x3173
			
 
				+// 0.385773
			
 
				+0x362c
			
 
				+// 0.998601
			
 
				+0x3bfd
			
 
				+// 0.108755
			
 
				+0x2ef6
			
 
				+// 0.479921
			
 
				+0x37ae
			
 
				+// 0.592423
			
 
				+0x38bd
			
 
				+// 0.431234
			
 
				+0x36e6
			
 
				+// 0.413843
			
 
				+0x369f
			
 
				+// 0.959014
			
 
				+0x3bac
			
 
				+// 0.665528
			
 
				+0x3953
			
 
				+// 0.952834
			
 
				+0x3b9f
			
 
				+// 0.562821
			
 
				+0x3881
			
 
				+// 0.609152
			
 
				+0x38e0
			
 
				+// 0.890135
			
 
				+0x3b1f
			
 
				+// 0.002624
			
 
				+0x1960
			
 
				+// 0.666853
			
 
				+0x3956
			
 
				+// 0.813988
			
 
				+0x3a83
			
 
				+// 0.170387
			
 
				+0x3174
			
 
				+// 0.264551
			
 
				+0x343c
			
 
				+// 0.699111
			
 
				+0x3998
			
 
				+// 0.738154
			
 
				+0x39e8
			
 
				+// 0.475053
			
 
				+0x379a
			
 
				+// 0.439282
			
 
				+0x3707
			
 
				+// 0.699736
			
 
				+0x3999
			
 
				+// 0.488956
			
 
				+0x37d3
			
 
				+// 0.115565
			
 
				+0x2f65
			
 
				+// 0.115452
			
 
				+0x2f64
			
 
				+// 0.586420
			
 
				+0x38b1
			
 
				+// 0.327648
			
 
				+0x353e
			
 
				+// 0.442207
			
 
				+0x3713
			
 
				+// 0.752186
			
 
				+0x3a04
			
 
				+// 0.476363
			
 
				+0x379f
			
 
				+// 0.491094
			
 
				+0x37dc
			
 
				+// 0.552734
			
 
				+0x386c
			
 
				+// 0.674894
			
 
				+0x3966
			
 
				+// 0.705545
			
 
				+0x39a5
			
 
				+// 0.966659
			
 
				+0x3bbc
			
 
				+// 0.528146
			
 
				+0x383a
			
 
				+// 0.633660
			
 
				+0x3912
			
 
				+// 0.049129
			
 
				+0x2a4a
			
 
				+// 0.458540
			
 
				+0x3756
			
 
				+// 0.331162
			
 
				+0x354c
			
 
				+// 0.399161
			
 
				+0x3663
			
 
				+// 0.015650
			
 
				+0x2402
			
 
				+// 0.271388
			
 
				+0x3458
			
 
				+// 0.912530
			
 
				+0x3b4d
			
 
				+// 0.655578
			
 
				+0x393f
			
--- a/Testing/Patterns/DSP/Support/SupportF16/Ref6_f16.txt
+++ b/Testing/Patterns/DSP/Support/SupportF16/Ref6_f16.txt
@@ -1,8 +1,8 @@
 
				 H
			
 
				 3
			
 
				-// 0.544937
			
 
				-0x385c
			
 
				-// 0.581009
			
 
				-0x38a6
			
 
				-// 0.504352
			
 
				-0x3809
			
 
				+// 0.445338
			
 
				+0x3720
			
 
				+// 0.569713
			
 
				+0x388f
			
 
				+// 0.564358
			
 
				+0x3884
			
--- a/Testing/Patterns/DSP/Support/SupportF16/Weights6_f16.txt
+++ b/Testing/Patterns/DSP/Support/SupportF16/Weights6_f16.txt
@@ -1,102 +1,102 @@
 
				 H
			
 
				 50
			
 
				-// 0.873606
			
 
				-0x3afd
			
 
				-// 0.005892
			
 
				-0x1e09
			
 
				-// 0.669526
			
 
				-0x395b
			
 
				-// 0.198146
			
 
				-0x3257
			
 
				-// 0.032904
			
 
				-0x2836
			
 
				-// 0.829321
			
 
				-0x3aa2
			
 
				-// 0.044536
			
 
				-0x29b3
			
 
				-// 0.842708
			
 
				-0x3abe
			
 
				-// 0.981319
			
 
				-0x3bda
			
 
				-// 0.274000
			
 
				-0x3462
			
 
				-// 0.509194
			
 
				-0x3813
			
 
				-// 0.994965
			
 
				-0x3bf6
			
 
				-// 0.572766
			
 
				-0x3895
			
 
				-// 0.557596
			
 
				-0x3876
			
 
				-// 0.306704
			
 
				-0x34e8
			
 
				-// 0.983822
			
 
				-0x3bdf
			
 
				-// 0.145555
			
 
				-0x30a8
			
 
				-// 0.539088
			
 
				-0x3850
			
 
				-// 0.527526
			
 
				-0x3838
			
 
				-// 0.501409
			
 
				-0x3803
			
 
				-// 0.249480
			
 
				-0x33fc
			
 
				-// 0.273503
			
 
				-0x3460
			
 
				-// 0.567660
			
 
				-0x388b
			
 
				-// 0.741561
			
 
				-0x39ef
			
 
				-// 0.458026
			
 
				-0x3754
			
 
				-// 0.239162
			
 
				-0x33a7
			
 
				-// 0.731739
			
 
				-0x39db
			
 
				-// 0.662458
			
 
				-0x394d
			
 
				-// 0.024779
			
 
				-0x2658
			
 
				-// 0.086811
			
 
				-0x2d8e
			
 
				-// 0.660991
			
 
				-0x394a
			
 
				-// 0.834424
			
 
				-0x3aad
			
 
				-// 0.163672
			
 
				-0x313d
			
 
				-// 0.120432
			
 
				-0x2fb5
			
 
				-// 0.593488
			
 
				-0x38bf
			
 
				-// 0.973784
			
 
				-0x3bca
			
 
				-// 0.167473
			
 
				-0x315c
			
 
				-// 0.858171
			
 
				-0x3ade
			
 
				-// 0.986637
			
 
				-0x3be5
			
 
				-// 0.223556
			
 
				-0x3327
			
 
				-// 0.382377
			
 
				-0x361e
			
 
				-// 0.757667
			
 
				-0x3a10
			
 
				-// 0.032219
			
 
				-0x2820
			
 
				-// 0.574024
			
 
				-0x3898
			
 
				-// 0.125286
			
 
				-0x3002
			
 
				-// 0.946997
			
 
				-0x3b93
			
 
				-// 0.942443
			
 
				-0x3b8a
			
 
				-// 0.152563
			
 
				-0x30e2
			
 
				-// 0.240567
			
 
				-0x33b3
			
 
				-// 0.160261
			
 
				-0x3121
			
 
				+// 0.305271
			
 
				+0x34e2
			
 
				+// 0.032698
			
 
				+0x282f
			
 
				+// 0.266409
			
 
				+0x3443
			
 
				+// 0.617540
			
 
				+0x38f1
			
 
				+// 0.152922
			
 
				+0x30e5
			
 
				+// 0.774995
			
 
				+0x3a33
			
 
				+// 0.848550
			
 
				+0x3aca
			
 
				+// 0.523429
			
 
				+0x3830
			
 
				+// 0.116035
			
 
				+0x2f6d
			
 
				+// 0.649223
			
 
				+0x3932
			
 
				+// 0.246758
			
 
				+0x33e5
			
 
				+// 0.826661
			
 
				+0x3a9d
			
 
				+// 0.967839
			
 
				+0x3bbe
			
 
				+// 0.283239
			
 
				+0x3488
			
 
				+// 0.847203
			
 
				+0x3ac7
			
 
				+// 0.720735
			
 
				+0x39c4
			
 
				+// 0.477014
			
 
				+0x37a2
			
 
				+// 0.633504
			
 
				+0x3911
			
 
				+// 0.596646
			
 
				+0x38c6
			
 
				+// 0.832505
			
 
				+0x3aa9
			
 
				+// 0.135769
			
 
				+0x3058
			
 
				+// 0.607454
			
 
				+0x38dc
			
 
				+// 0.914942
			
 
				+0x3b52
			
 
				+// 0.474766
			
 
				+0x3799
			
 
				+// 0.781271
			
 
				+0x3a40
			
 
				+// 0.084347
			
 
				+0x2d66
			
 
				+// 0.038805
			
 
				+0x28f8
			
 
				+// 0.055045
			
 
				+0x2b0c
			
 
				+// 0.334330
			
 
				+0x3559
			
 
				+// 0.331283
			
 
				+0x354d
			
 
				+// 0.595124
			
 
				+0x38c3
			
 
				+// 0.443636
			
 
				+0x3719
			
 
				+// 0.612722
			
 
				+0x38e7
			
 
				+// 0.152437
			
 
				+0x30e1
			
 
				+// 0.311869
			
 
				+0x34fd
			
 
				+// 0.177173
			
 
				+0x31ab
			
 
				+// 0.540216
			
 
				+0x3852
			
 
				+// 0.941623
			
 
				+0x3b88
			
 
				+// 0.666747
			
 
				+0x3955
			
 
				+// 0.002608
			
 
				+0x1958
			
 
				+// 0.092911
			
 
				+0x2df2
			
 
				+// 0.129786
			
 
				+0x3027
			
 
				+// 0.710137
			
 
				+0x39ae
			
 
				+// 0.488728
			
 
				+0x37d2
			
 
				+// 0.478723
			
 
				+0x37a9
			
 
				+// 0.627602
			
 
				+0x3905
			
 
				+// 0.001795
			
 
				+0x175b
			
 
				+// 0.299980
			
 
				+0x34cd
			
 
				+// 0.636209
			
 
				+0x3917
			
 
				+// 0.102662
			
 
				+0x2e92
			
--- a/Testing/Source/Tests/BIQUADF16.cpp
+++ b/Testing/Source/Tests/BIQUADF16.cpp
@@ -10,8 +10,8 @@ Reference patterns are generated with
 
				 a double precision computation.
			
 
				 
			
 
				 */
			
 
				-#define REL_ERROR (3.0e-3)
			
 
				-#define ABS_ERROR (3.5e-2)
			
 
				+#define REL_ERROR (5.0e-2)
			
 
				+#define ABS_ERROR (1.0e-1)
			
 
				 
			
 
				     void BIQUADF16::test_biquad_cascade_df1_ref()
			
 
				     {
			
--- a/Testing/Source/Tests/DistanceTestsF16.cpp
+++ b/Testing/Source/Tests/DistanceTestsF16.cpp
@@ -3,7 +3,7 @@
 
				 #include "Error.h"
			
 
				 #include "Test.h"
			
 
				 
			
 
				-#define REL_ERROR (2e-3)
			
 
				+#define REL_ERROR (5e-3)
			
 
				 
			
 
				 #define REL_JS_ERROR (3e-2)
			
 
				 
			
--- a/Testing/Source/Tests/SupportTestsF16.cpp
+++ b/Testing/Source/Tests/SupportTestsF16.cpp
@@ -7,8 +7,8 @@
 
				 #define SNR_THRESHOLD 120
			
 
				 #define REL_ERROR (1.0e-5)
			
 
				 
			
 
				-#define ABS_WEIGHTEDSUM_ERROR (5.0e-2)
			
 
				-#define REL_WEIGHTEDSUM_ERROR (5.0e-2)
			
 
				+#define ABS_WEIGHTEDSUM_ERROR (1.0e-1)
			
 
				+#define REL_WEIGHTEDSUM_ERROR (5.0e-3)
			
 
				 
			
 
				 #define ABS_ERROR_F32 (1.0e-3)
			
 
				 #define REL_ERROR_F32 (1.0e-3)
			
@@ -28,7 +28,7 @@ void SupportTestsF16::test_weighted_sum_f16()
 
				  
			
 
				  
			
 
				  *outp=arm_weighted_sum_f16(inp, coefsp,this->nbSamples);
			
 
				- 
			
 
				+
			
 
				  ASSERT_CLOSE_ERROR(*outp,refp[this->offset],ABS_WEIGHTEDSUM_ERROR,REL_WEIGHTEDSUM_ERROR);
			
 
				  ASSERT_EMPTY_TAIL(output);
			
 
				 
			
--- a/Testing/bench.txt
+++ b/Testing/bench.txt
@@ -754,7 +754,7 @@ group Root {
 
				                 }
			
 
				 
			
 
				                 Functions {
			
 
				-                   Cascaded BiQuad Filter DF2T_f64:test_biquad_cascade_df2T_f64
			
 
				+                   Cascaded BiQuad Filter DF2T:test_biquad_cascade_df2T_f64
			
 
				                 } -> PARAM1_ID
			
 
				               }
			
 
				            }
			
@@ -1570,8 +1570,8 @@ group Root {
 
				                    Complex FFT:test_cfft_f32 -> CFFT_PARAM_ID
			
 
				                    Real FFT:test_rfft_f32 -> RFFT_PARAM_ID
			
 
				                    DCT4:test_dct4_f32 -> DCT_PARAM_ID
			
 
				-                   Radix 4 Complex FFT:test_cfft_radix4_f32 -> CFFT4_PARAM_ID
			
 
				-                   Radix 2 Complex FFT:test_cfft_radix2_f32 -> CFFT_PARAM_ID
			
 
				+                   Scalar Radix 4 Complex FFT:test_cfft_radix4_f32 -> CFFT4_PARAM_ID
			
 
				+                   Scalar Radix 2 Complex FFT:test_cfft_radix2_f32 -> CFFT_PARAM_ID
			
 
				                 }
			
 
				            }
			
 
				 
			
@@ -1620,8 +1620,8 @@ group Root {
 
				                    Complex FFT:test_cfft_q31 -> CFFT_PARAM_ID
			
 
				                    Real FFT:test_rfft_q31 -> RFFT_PARAM_ID
			
 
				                    DCT4:test_dct4_q31 -> DCT_PARAM_ID
			
 
				-                   Radix 4 Complex FFT:test_cfft_radix4_q31 -> CFFT4_PARAM_ID
			
 
				-                   Radix 2 Complex FFT:test_cfft_radix2_q31 -> CFFT_PARAM_ID
			
 
				+                   Scalar Radix 4 Complex FFT:test_cfft_radix4_q31 -> CFFT4_PARAM_ID
			
 
				+                   Scalar Radix 2 Complex FFT:test_cfft_radix2_q31 -> CFFT_PARAM_ID
			
 
				                 }
			
 
				            }
			
 
				 
			
@@ -1670,8 +1670,8 @@ group Root {
 
				                    Complex FFT:test_cfft_q15 -> CFFT_PARAM_ID
			
 
				                    Real FFT:test_rfft_q15 -> RFFT_PARAM_ID
			
 
				                    DCT4:test_dct4_q15 -> DCT_PARAM_ID
			
 
				-                   Radix 4 Complex FFT:test_cfft_radix4_q15 -> CFFT4_PARAM_ID
			
 
				-                   Radix 2 Complex FFT:test_cfft_radix2_q15 -> CFFT_PARAM_ID
			
 
				+                   Scalar Radix 4 Complex FFT:test_cfft_radix4_q15 -> CFFT4_PARAM_ID
			
 
				+                   Scalar Radix 2 Complex FFT:test_cfft_radix2_q15 -> CFFT_PARAM_ID
			
 
				                 }
			
 
				            }
			
 
				         }
			
--- a/Testing/bench_f16.txt
+++ b/Testing/bench_f16.txt
@@ -584,8 +584,8 @@ group Root {
 
				                 Functions {
			
 
				                    Complex FFT:test_cfft_f16 -> CFFT_PARAM_ID
			
 
				                    Real FFT:test_rfft_f16 -> RFFT_PARAM_ID
			
 
				-                   Radix 4 Complex FFT:test_cfft_radix4_f16 -> CFFT4_PARAM_ID
			
 
				-                   Radix 2 Complex FFT:test_cfft_radix2_f16 -> CFFT_PARAM_ID
			
 
				+                   Scalar Radix 4 Complex FFT:test_cfft_radix4_f16 -> CFFT4_PARAM_ID
			
 
				+                   Scalar Radix 2 Complex FFT:test_cfft_radix2_f16 -> CFFT_PARAM_ID
			
 
				                 }
			
 
				            }