Sfoglia il codice sorgente

CMSIS-DSP: Remove type punning in vector code and some compilation warnings.

Christophe Favergeon 4 anni fa
parent
commit
ac7da660b7

+ 1 - 2
Source/DistanceFunctions/arm_minkowski_distance_f16.c

@@ -69,10 +69,9 @@
 float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
 {
     uint32_t        blkCnt;
-    f16x8_t         a, b, tmpV, accumV, sumV;
+    f16x8_t         a, b, tmpV, sumV;
 
     sumV = vdupq_n_f16(0.0f);
-    accumV = vdupq_n_f16(0.0f);
 
     blkCnt = blockSize >> 3;
     while (blkCnt > 0U) {

+ 1 - 2
Source/DistanceFunctions/arm_minkowski_distance_f32.c

@@ -76,10 +76,9 @@ __attribute__((weak)) float __powisf2(float a, int b)
 float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
 {
     uint32_t        blkCnt;
-    f32x4_t         a, b, tmpV, accumV, sumV;
+    f32x4_t         a, b, tmpV, sumV;
 
     sumV = vdupq_n_f32(0.0f);
-    accumV = vdupq_n_f32(0.0f);
 
     blkCnt = blockSize >> 2;
     while (blkCnt > 0U) {

+ 3 - 3
Source/FilteringFunctions/arm_fir_interpolate_f32.c

@@ -147,7 +147,7 @@ static void arm_fir_interpolate2_f32_mve(
     uint32_t  blkCnt;           /* Loop counters */
     uint16_t  phaseLen = S->phaseLength;    /* Length of each polyphase filter component */
     uint32_t  strides[4] = { 0, 1 * 2, 2 * 2, 3 * 2 };
-    uint32x4_t vec_strides0 = *(uint32x4_t *) strides;
+    uint32x4_t vec_strides0 = vld1q_u32(strides);
     uint32x4_t vec_strides1 = vec_strides0 + 1;
     f32x4_t acc0, acc1;
 
@@ -271,8 +271,8 @@ void arm_fir_interpolate_f32(
     uint16_t  phaseLen = S->phaseLength;    /* Length of each polyphase filter component */
     uint32_t  strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
     uint32_t  stridesM[4] = { 4, 3, 2, 1 };
-    uint32x4_t vec_stridesM = *(uint32x4_t *) stridesM;
-    uint32x4_t vec_strides = *(uint32x4_t *) strides;
+    uint32x4_t vec_stridesM =  vld1q_u32(stridesM);
+    uint32x4_t vec_strides =  vld1q_u32(strides);
     f32x4_t acc;
 
 

+ 1 - 1
Source/FilteringFunctions/arm_fir_interpolate_q31.c

@@ -71,7 +71,7 @@ void arm_fir_interpolate_q31(
     uint32_t  i, blkCnt;        /* Loop counters */
     uint16_t  phaseLen = S->phaseLength;    /* Length of each polyphase filter component */
     uint32_t  strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
-    uint32x4_t vec_strides0 = *(uint32x4_t *) strides;
+    uint32x4_t vec_strides0 =  vld1q_u32(strides);
     uint32x4_t vec_strides1 = vec_strides0 + 1;
     uint32x4_t vec_strides2 = vec_strides0 + 2;
     uint32x4_t vec_strides3 = vec_strides0 + 3;

+ 1 - 0
Source/FilteringFunctions/arm_levinson_durbin_q31.c

@@ -75,6 +75,7 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d)
 
   // Result is in Q14 because of use of HALF_Q15 instead of ONE_Q15.
   status=arm_divide_q15(HALF_Q15,d>>16,&inverse,&shift);
+  (void)status;
   
   // d is used instead of l
   // So we will need to substract to 2 instead of 1.

+ 1 - 3
Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c

@@ -532,7 +532,7 @@ arm_status arm_mat_cmplx_mult_f32(
     uint16_t  numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t  numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t  numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t  col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t  col, i = 0U, row = numRowsA;  /* loop counters */
     arm_status status;          /* status of matrix multiplication */
     uint32x4_t vecOffs, vecColBOffs;
     uint32_t  blkCnt, rowCnt;           /* loop counters */
@@ -611,7 +611,6 @@ arm_status arm_mat_cmplx_mult_f32(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             float32_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
             float32_t const *pInA0 = pInA;
@@ -752,7 +751,6 @@ arm_status arm_mat_cmplx_mult_f32(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             float32_t const *pSrcA0Vec;
             float32_t const *pInA0 = pInA;

+ 1 - 3
Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c

@@ -526,7 +526,7 @@ arm_status arm_mat_cmplx_mult_q31(
     uint16_t  numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t  numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t  numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t  col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t  col, i = 0U, row = numRowsA;  /* loop counters */
     arm_status status;          /* status of matrix multiplication */
     uint32x4_t vecOffs, vecColBOffs;
     uint32_t  blkCnt, rowCnt;           /* loop counters */
@@ -611,7 +611,6 @@ arm_status arm_mat_cmplx_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec, *pSrcA1Vec;
             q31_t const *pInA0 = pInA;
@@ -742,7 +741,6 @@ arm_status arm_mat_cmplx_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec;
             q31_t const *pInA0 = pInA;

+ 1 - 3
Source/MatrixFunctions/arm_mat_mult_q15.c

@@ -328,7 +328,7 @@ arm_status arm_mat_mult_q15(
     uint16_t  numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t  numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t  numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t  col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t  col, i = 0U, row = numRowsA;  /* loop counters */
     uint16x8_t vecOffs, vecColBOffs;
     uint32_t  blkCnt,rowCnt;           /* loop counters */
     arm_status status;                             /* Status of matrix multiplication */
@@ -403,7 +403,6 @@ arm_status arm_mat_mult_q15(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q15_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
             q15_t    *pInA0 = pInA;
@@ -519,7 +518,6 @@ arm_status arm_mat_mult_q15(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q15_t const *pSrcA0Vec;
             q15_t    *pInA0 = pInA;

+ 1 - 3
Source/MatrixFunctions/arm_mat_mult_q31.c

@@ -344,7 +344,7 @@ arm_status arm_mat_mult_q31(
     uint16_t    numRowsA = pSrcA->numRows;    /* number of rows of input matrix A    */
     uint16_t    numColsB = pSrcB->numCols;    /* number of columns of input matrix B */
     uint16_t    numColsA = pSrcA->numCols;    /* number of columns of input matrix A */
-    uint16_t    col, i = 0U, row = numRowsA, colCnt;  /* loop counters */
+    uint16_t    col, i = 0U, row = numRowsA;  /* loop counters */
     arm_status  status;          /* status of matrix multiplication */
     uint32x4_t  vecOffs, vecColBOffs;
     uint32_t    blkCnt, rowCnt;           /* loop counters */
@@ -418,7 +418,6 @@ arm_status arm_mat_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
             q31_t const   *pInA0 = pInA;
@@ -541,7 +540,6 @@ arm_status arm_mat_mult_q31(
             /*
              * Matrix A columns number of MAC operations are to be performed
              */
-            colCnt = numColsA;
 
             q31_t const *pSrcA0Vec;
             q31_t const   *pInA0 = pInA;

+ 2 - 2
Source/TransformFunctions/arm_cfft_f16.c

@@ -208,7 +208,7 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32(strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /* load scheduling */
@@ -414,7 +414,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32(strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*

+ 2 - 2
Source/TransformFunctions/arm_cfft_f32.c

@@ -210,7 +210,7 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32(strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /* load scheduling */
@@ -416,7 +416,7 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 *
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32 (strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*

+ 2 - 2
Source/TransformFunctions/arm_cfft_q15.c

@@ -157,7 +157,7 @@ static void _arm_radix4_butterfly_q15_mve(
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32 (strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -392,7 +392,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32(strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*

+ 2 - 2
Source/TransformFunctions/arm_cfft_q31.c

@@ -167,7 +167,7 @@ static void _arm_radix4_butterfly_q31_mve(
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32(strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*
@@ -417,7 +417,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
     /*
      * start of Last stage process
      */
-    uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
+    uint32x4_t vecScGathAddr = vld1q_u32(strides);
     vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
 
     /*

+ 2 - 0
Testing/Source/Tests/FastMathQ15.cpp

@@ -30,6 +30,8 @@ a double precision computation.
           status = arm_divide_q15(nump[i],denp[i],&outp[i],&shiftp[i]);
         }
 
+        (void)status;
+
         ASSERT_SNR(ref,output,(float32_t)SNR_THRESHOLD);
         ASSERT_NEAR_EQ(ref,output,ABS_ERROR);
         ASSERT_EQ(refShift,shift);