arm_weighted_sum_f32.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_weighted_sum_f32.c
  4. * Description: Weighted Sum
  5. *
  6. *
  7. * Target Processor: Cortex-M and Cortex-A cores
  8. * -------------------------------------------------------------------- */
  9. /*
  10. * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  11. *
  12. * SPDX-License-Identifier: Apache-2.0
  13. *
  14. * Licensed under the Apache License, Version 2.0 (the License); you may
  15. * not use this file except in compliance with the License.
  16. * You may obtain a copy of the License at
  17. *
  18. * www.apache.org/licenses/LICENSE-2.0
  19. *
  20. * Unless required by applicable law or agreed to in writing, software
  21. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  22. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23. * See the License for the specific language governing permissions and
  24. * limitations under the License.
  25. */
  26. #include "arm_math.h"
  27. #include <limits.h>
  28. #include <math.h>
  29. /**
  30. * @addtogroup groupSupport
  31. * @{
  32. */
  33. /**
  34. * @brief Weighted sum
  35. *
  36. *
  37. * @param[in] *in Array of input values.
  38. * @param[in] *weigths Weights
  39. * @param[in] blockSize Number of samples in the input array.
  40. * @return Weighted sum
  41. *
  42. */
  43. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  44. #include "arm_helium_utils.h"
  45. float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uint32_t blockSize)
  46. {
  47. float32_t accum1, accum2;
  48. f32x4_t accum1V, accum2V;
  49. f32x4_t inV, wV;
  50. const float32_t *pIn, *pW;
  51. uint32_t blkCnt;
  52. pIn = in;
  53. pW = weigths;
  54. accum1V = vdupq_n_f32(0.0);
  55. accum2V = vdupq_n_f32(0.0);
  56. blkCnt = blockSize >> 2;
  57. while (blkCnt > 0)
  58. {
  59. inV = vld1q(pIn);
  60. wV = vld1q(pW);
  61. pIn += 4;
  62. pW += 4;
  63. accum1V = vfmaq(accum1V, inV, wV);
  64. accum2V = vaddq(accum2V, wV);
  65. blkCnt--;
  66. }
  67. accum1 = vecAddAcrossF32Mve(accum1V);
  68. accum2 = vecAddAcrossF32Mve(accum2V);
  69. blkCnt = blockSize & 3;
  70. while(blkCnt > 0)
  71. {
  72. accum1 += *pIn++ * *pW;
  73. accum2 += *pW++;
  74. blkCnt--;
  75. }
  76. return (accum1 / accum2);
  77. }
  78. #else
  79. #if defined(ARM_MATH_NEON)
  80. #include "NEMath.h"
  81. float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uint32_t blockSize)
  82. {
  83. float32_t accum1, accum2;
  84. float32x4_t accum1V, accum2V;
  85. float32x2_t tempV;
  86. float32x4_t inV,wV;
  87. const float32_t *pIn, *pW;
  88. uint32_t blkCnt;
  89. pIn = in;
  90. pW = weigths;
  91. accum1=0.0f;
  92. accum2=0.0f;
  93. accum1V = vdupq_n_f32(0.0f);
  94. accum2V = vdupq_n_f32(0.0f);
  95. blkCnt = blockSize >> 2;
  96. while(blkCnt > 0)
  97. {
  98. inV = vld1q_f32(pIn);
  99. wV = vld1q_f32(pW);
  100. pIn += 4;
  101. pW += 4;
  102. accum1V = vmlaq_f32(accum1V,inV,wV);
  103. accum2V = vaddq_f32(accum2V,wV);
  104. blkCnt--;
  105. }
  106. tempV = vpadd_f32(vget_low_f32(accum1V),vget_high_f32(accum1V));
  107. accum1 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
  108. tempV = vpadd_f32(vget_low_f32(accum2V),vget_high_f32(accum2V));
  109. accum2 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
  110. blkCnt = blockSize & 3;
  111. while(blkCnt > 0)
  112. {
  113. accum1 += *pIn++ * *pW;
  114. accum2 += *pW++;
  115. blkCnt--;
  116. }
  117. return(accum1 / accum2);
  118. }
  119. #else
  120. float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, uint32_t blockSize)
  121. {
  122. float32_t accum1, accum2;
  123. const float32_t *pIn, *pW;
  124. uint32_t blkCnt;
  125. pIn = in;
  126. pW = weigths;
  127. accum1=0.0f;
  128. accum2=0.0f;
  129. blkCnt = blockSize;
  130. while(blkCnt > 0)
  131. {
  132. accum1 += *pIn++ * *pW;
  133. accum2 += *pW++;
  134. blkCnt--;
  135. }
  136. return(accum1 / accum2);
  137. }
  138. #endif
  139. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  140. /**
  141. * @} end of groupSupport group
  142. */