Просмотр исходного кода

CMSIS-NN : Revert update to arm_softmax_q7()

To avoid accuracy issues caused by earlier commits ede5bd3e2 and
cebb96c3f, the changes are reverted.

Change-Id: I59977d130486710fa569b499a71d11f2fee74fa3
Felix Johnny 5 лет назад
Родитель
Сommit
9cb4c17e6f
1 измененных файлов с 5 добавлено и 161 удалено
  1. 5 161
      CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c

+ 5 - 161
CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c

@@ -21,7 +21,7 @@
  * Title:        arm_softmax_q7.c
  * Description:  Q7 softmax function
  *
- * $Date:        February 27, 2020
+ * $Date:        June 8, 2020
  * $Revision:    V.1.0.1
  *
  * Target Processor:  Cortex-M cores
@@ -57,49 +57,17 @@
    *  But mathematically, the gradient will be the same
    *  with a log(2) scaling factor.
    *
-   *  If we compare the position of the max value in output of this
-   *  function with a reference float32 softmax (and thus using exp)
-   *  we see that the position of the max value is sometimes different.
-   *
-   *  If we do statistics on lot of input vectors we can compute
-   *  an average error rate in percent. It is the percent of time
-   *  that the max will be at a position different from the one
-   *  computed with a reference float32 implementation.
-   *
-   *  This average error rate is dependent on the vector size.
-   *  We have:
-   *
-   *  Average error rate in percent = -0.555548 + 0.246918 dim_vec
-   *  Variance of the error rate = -0.0112281 + 0.0382476 dim_vec
-   *
-   *
    */
 
-#define Q7BITS 8
-#define LOG2Q7BITS 3
-
 void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out )
 {
-#if defined (ARM_MATH_DSP)
     q31_t     sum;
     int16_t   i;
     uint8_t   shift;
     q15_t     base;
-    uint16_t blkCnt;
-
-    q31_t in,in1,in2;
-    q31_t out1, out2;
-
-    q31_t baseV;
-    q31_t shiftV;
-    const q31_t pad=0x0d0d0d0d;
-    const q7_t *pIn=vec_in;
-
     base = -128;
 
-
     /* We first search for the maximum */
-
     for (i = 0; i < dim_vec; i++)
     {
         if (vec_in[i] > base)
@@ -108,157 +76,33 @@ void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out )
         }
     }
 
-
     /*
      * So the base is set to max-8, meaning
      * that we ignore really small values.
      * anyway, they will be 0 after shrinking to q7_t.
      */
-    base = base - Q7BITS;
-    baseV = ((base & 0x0FF) << 24) | ((base & 0x0FF) << 16) | ((base & 0x0FF) << 8) | ((base & 0x0FF));
+    base = base - (1 << 3);
 
     sum = 0;
 
-    blkCnt = dim_vec >> 2;
-
-    while(blkCnt)
-    {
-       in=arm_nn_read_q7x4_ia(&pIn);
-       in=__SSUB8(in,baseV);
-
-        in1 = __SXTB16(__ROR(in, 8));
-
-        /* extend remaining two q7_t values to q15_t values */
-        in2 = __SXTB16(in);
-
-#ifndef ARM_MATH_BIG_ENDIAN
-        out2 = __PKHTB(in1, in2, 16);
-        out1 = __PKHBT(in2, in1, 16);
-#else
-        out1 = __PKHTB(in1, in2, 16);
-        out2 = __PKHBT(in2, in1, 16);
-#endif
-
-
-       shiftV = __USAT16(out1,LOG2Q7BITS);
-       sum += 0x1 << (shiftV & 0x0FF);
-       sum += 0x1 << ((shiftV >> 16) & 0x0FF);
-
-       shiftV = __USAT16(out2,LOG2Q7BITS);
-       sum += 0x1 << (shiftV & 0x0FF);
-       sum += 0x1 << ((shiftV >> 16) & 0x0FF);
-
-       blkCnt--;
-    }
-
-    blkCnt = dim_vec & 3;
-
-    while(blkCnt)
-    {
-       shift = (uint8_t)__USAT(*pIn++ - base, LOG2Q7BITS);
-       sum += 0x1 << shift;
-       blkCnt--;
-    }
-
-
-    /* This is effectively (0x1 << 20) / sum */
-    int output_base = (1 << 20) / sum;
-
-
-    pIn=vec_in;
-
-    blkCnt = dim_vec >> 2;
-    while(blkCnt)
-    {
-
-        /* Here minimum value of 13+base-vec_in[i] will be 5 */
-        in=arm_nn_read_q7x4_ia(&pIn);
-        in=__SSUB8(pad,in);
-        in=__SADD8(in,baseV);
-
-        in1 = __SXTB16(__ROR(in, 8));
-
-        /* extend remaining two q7_t values to q15_t values */
-        in2 = __SXTB16(in);
-
-#ifndef ARM_MATH_BIG_ENDIAN
-        out2 = __PKHTB(in1, in2, 16);
-        out1 = __PKHBT(in2, in1, 16);
-#else
-        out1 = __PKHTB(in1, in2, 16);
-        out2 = __PKHBT(in2, in1, 16);
-#endif
-
-        shiftV = __USAT16(out1,5);
-        *p_out++ = (q7_t) __SSAT((output_base >> (shiftV & 0x0FF)), 8);
-        *p_out++ = (q7_t) __SSAT((output_base >> ((shiftV >> 16) & 0x0FF)), 8);
-
-        shiftV = __USAT16(out2,5);
-        *p_out++ = (q7_t) __SSAT((output_base >> (shiftV & 0x0FF)), 8);
-        *p_out++ = (q7_t) __SSAT((output_base >> ((shiftV >> 16) & 0x0FF)), 8);
-
-        blkCnt --;
-    }
-
-
-    blkCnt = dim_vec & 3;
-    while(blkCnt)
-    {
-
-        /* Here minimum value of 13+base-vec_in[i] will be 5 */
-        shift = (uint8_t)__USAT(13 + base - *pIn++, 5);
-        *p_out++ = (q7_t) __SSAT((output_base >> shift), 8);
-
-        blkCnt --;
-    }
-#else
-    q31_t     sum;
-    int16_t   i;
-    uint8_t   shift;
-    q15_t     base;
-
-    base = -128;
-
-    /* We first search for the maximum */
-
     for (i = 0; i < dim_vec; i++)
     {
-        if (vec_in[i] > base)
-        {
-            base = vec_in[i];
-        }
-    }
-
-
-    /*
-     * So the base is set to max-8, meaning
-     * that we ignore really small values.
-     * anyway, they will be 0 after shrinking to q7_t.
-     */
-    base = base - Q7BITS;
-
-    sum = 0;
-
-    for (i = 0; i < dim_vec; i++)
-    {
-        shift = (uint8_t)__USAT(vec_in[i] - base, LOG2Q7BITS);
+        shift = (uint8_t)__USAT(vec_in[i] - base, 3);
         sum += 0x1 << shift;
     }
 
     /* This is effectively (0x1 << 20) / sum */
     int output_base = (1 << 20) / sum;
 
-
     for (i = 0; i < dim_vec; i++)
     {
 
         /* Here minimum value of 13+base-vec_in[i] will be 5 */
         shift = (uint8_t)__USAT(13 + base - vec_in[i], 5);
-        p_out[i] = (q7_t) __SSAT((output_base >> shift), 8);
-
+        p_out[i] = (q7_t)__SSAT((output_base >> shift), 8);
     }
-#endif
 }
+
 /**
  * @} end of Softmax group
  */