arm_absmax_f32.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_absmax_f32.c
  4. * Description: Maximum value of absolute values of a floating-point vector
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/statistics_functions.h"
  29. #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
  30. #include <limits.h>
  31. #endif
  32. /**
  33. @ingroup groupStats
  34. */
  35. /**
  36. @defgroup AbsMax Absolute Maximum
  37. Computes the maximum value of absolute values of an array of data.
  38. The function returns both the maximum value and its position within the array.
  39. There are separate functions for floating-point, Q31, Q15, and Q7 data types.
  40. */
  41. /**
  42. @addtogroup AbsMax
  43. @{
  44. */
  45. /**
  46. @brief Maximum value of absolute values of a floating-point vector.
  47. @param[in] pSrc points to the input vector
  48. @param[in] blockSize number of samples in input vector
  49. @param[out] pResult maximum value returned here
  50. @param[out] pIndex index of maximum value returned here
  51. @return none
  52. */
  53. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  54. #include "arm_helium_utils.h"
  55. void arm_absmax_f32(
  56. const float32_t * pSrc,
  57. uint32_t blockSize,
  58. float32_t * pResult,
  59. uint32_t * pIndex)
  60. {
  61. int32_t blkSize = blockSize;
  62. f32x4_t vecSrc;
  63. f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
  64. float32_t maxValue = F32_ABSMIN;
  65. uint32_t idx = blockSize;
  66. uint32x4_t indexVec;
  67. uint32x4_t curExtremIdxVec;
  68. uint32_t curIdx = 0;
  69. mve_pred16_t p0;
  70. indexVec = vidupq_wb_u32(&curIdx, 1);
  71. curExtremIdxVec = vdupq_n_u32(0);
  72. do {
  73. mve_pred16_t p = vctp32q(blkSize);
  74. vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p);
  75. vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p);
  76. /*
  77. * Get current max per lane and current index per lane
  78. * when a max is selected
  79. */
  80. p0 = vcmpgeq_m(vecSrc, curExtremValVec, p);
  81. curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
  82. curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
  83. /* Does TP detection works here ?? */
  84. indexVec = vidupq_wb_u32(&curIdx, 1);
  85. blkSize -= 4;
  86. pSrc += 4;
  87. }
  88. while (blkSize > 0);
  89. /*
  90. * Get max value across the vector
  91. */
  92. maxValue = vmaxnmvq(maxValue, curExtremValVec);
  93. /*
  94. * set index for lower values to max possible index
  95. */
  96. p0 = vcmpgeq(curExtremValVec, maxValue);
  97. indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
  98. /*
  99. * Get min index which is thus for a max value
  100. */
  101. idx = vminvq(idx, indexVec);
  102. /*
  103. * Save result
  104. */
  105. *pIndex = idx;
  106. *pResult = maxValue;
  107. }
  108. #else
  109. #if defined(ARM_MATH_LOOPUNROLL)
  110. void arm_absmax_f32(
  111. const float32_t * pSrc,
  112. uint32_t blockSize,
  113. float32_t * pResult,
  114. uint32_t * pIndex)
  115. {
  116. float32_t cur_absmax, out; /* Temporary variables to store the output value. */\
  117. uint32_t blkCnt, outIndex; /* Loop counter */ \
  118. uint32_t index; /* index of maximum value */ \
  119. \
  120. /* Initialize index value to zero. */ \
  121. outIndex = 0U; \
  122. /* Load first input value that act as reference value for comparision */ \
  123. out = *pSrc++; \
  124. out = (out > 0.0f) ? out : -out; \
  125. /* Initialize index of extrema value. */ \
  126. index = 0U; \
  127. \
  128. /* Loop unrolling: Compute 4 outputs at a time */ \
  129. blkCnt = (blockSize - 1U) >> 2U; \
  130. \
  131. while (blkCnt > 0U) \
  132. { \
  133. /* Initialize cur_absmax to next consecutive values one by one */ \
  134. cur_absmax = *pSrc++; \
  135. cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
  136. /* compare for the extrema value */ \
  137. if (cur_absmax > out) \
  138. { \
  139. /* Update the extrema value and it's index */ \
  140. out = cur_absmax; \
  141. outIndex = index + 1U; \
  142. } \
  143. \
  144. cur_absmax = *pSrc++; \
  145. cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
  146. if (cur_absmax > out) \
  147. { \
  148. out = cur_absmax; \
  149. outIndex = index + 2U; \
  150. } \
  151. \
  152. cur_absmax = *pSrc++; \
  153. cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
  154. if (cur_absmax > out) \
  155. { \
  156. out = cur_absmax; \
  157. outIndex = index + 3U; \
  158. } \
  159. \
  160. cur_absmax = *pSrc++; \
  161. cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
  162. if (cur_absmax > out) \
  163. { \
  164. out = cur_absmax; \
  165. outIndex = index + 4U; \
  166. } \
  167. \
  168. index += 4U; \
  169. \
  170. /* Decrement loop counter */ \
  171. blkCnt--; \
  172. } \
  173. \
  174. /* Loop unrolling: Compute remaining outputs */ \
  175. blkCnt = (blockSize - 1U) % 4U; \
  176. \
  177. \
  178. while (blkCnt > 0U) \
  179. { \
  180. cur_absmax = *pSrc++; \
  181. cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
  182. if (cur_absmax > out) \
  183. { \
  184. out = cur_absmax; \
  185. outIndex = blockSize - blkCnt; \
  186. } \
  187. \
  188. /* Decrement loop counter */ \
  189. blkCnt--; \
  190. } \
  191. \
  192. /* Store the extrema value and it's index into destination pointers */ \
  193. *pResult = out; \
  194. *pIndex = outIndex;
  195. }
  196. #else
  197. void arm_absmax_f32(
  198. const float32_t * pSrc,
  199. uint32_t blockSize,
  200. float32_t * pResult,
  201. uint32_t * pIndex)
  202. {
  203. float32_t maxVal, out; /* Temporary variables to store the output value. */
  204. uint32_t blkCnt, outIndex; /* Loop counter */
  205. /* Initialise index value to zero. */
  206. outIndex = 0U;
  207. /* Load first input value that act as reference value for comparision */
  208. out = fabsf(*pSrc++);
  209. /* Initialize blkCnt with number of samples */
  210. blkCnt = (blockSize - 1U);
  211. while (blkCnt > 0U)
  212. {
  213. /* Initialize maxVal to the next consecutive values one by one */
  214. maxVal = fabsf(*pSrc++);
  215. /* compare for the maximum value */
  216. if (out < maxVal)
  217. {
  218. /* Update the maximum value and it's index */
  219. out = maxVal;
  220. outIndex = blockSize - blkCnt;
  221. }
  222. /* Decrement loop counter */
  223. blkCnt--;
  224. }
  225. /* Store the maximum value and it's index into destination pointers */
  226. *pResult = out;
  227. *pIndex = outIndex;
  228. }
  229. #endif /* defined(ARM_MATH_LOOPUNROLL) */
  230. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  231. /**
  232. @} end of AbsMax group
  233. */