arm_max_q7.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_max_q7.c
  4. * Description: Maximum value of a Q7 vector
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/statistics_functions.h"
  29. /**
  30. @ingroup groupStats
  31. */
  32. /**
  33. @addtogroup Max
  34. @{
  35. */
  36. /**
  37. @brief Maximum value of a Q7 vector.
  38. @param[in] pSrc points to the input vector
  39. @param[in] blockSize number of samples in input vector
  40. @param[out] pResult maximum value returned here
  41. @param[out] pIndex index of maximum value returned here
  42. @return none
  43. */
  44. #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
  45. #include "arm_helium_utils.h"
  46. static void arm_small_blk_max_q7(
  47. const q7_t * pSrc,
  48. uint16_t blockSize,
  49. q7_t * pResult,
  50. uint32_t * pIndex)
  51. {
  52. int32_t blkCnt; /* loop counters */
  53. q7x16_t extremValVec = vdupq_n_s8(Q7_MIN);
  54. q7_t maxValue = Q7_MIN;
  55. uint8x16_t indexVec;
  56. uint8x16_t extremIdxVec;
  57. mve_pred16_t p0;
  58. uint8_t extremIdxArr[16];
  59. indexVec = vidupq_u8(0U, 1);
  60. blkCnt = blockSize;
  61. do {
  62. mve_pred16_t p = vctp8q(blkCnt);
  63. q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
  64. /*
  65. * Get current max per lane and current index per lane
  66. * when a max is selected
  67. */
  68. p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
  69. extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
  70. /* store per-lane extrema indexes */
  71. vst1q_p_u8(extremIdxArr, indexVec, p0);
  72. indexVec += 16;
  73. pSrc += 16;
  74. blkCnt -= 16;
  75. }
  76. while (blkCnt > 0);
  77. /* Get max value across the vector */
  78. maxValue = vmaxvq(maxValue, extremValVec);
  79. /* set index for lower values to max possible index */
  80. p0 = vcmpgeq(extremValVec, maxValue);
  81. extremIdxVec = vld1q_u8(extremIdxArr);
  82. indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
  83. *pIndex = vminvq_u8(blockSize - 1, indexVec);
  84. *pResult = maxValue;
  85. }
  86. void arm_max_q7(
  87. const q7_t * pSrc,
  88. uint32_t blockSize,
  89. q7_t * pResult,
  90. uint32_t * pIndex)
  91. {
  92. int32_t totalSize = blockSize;
  93. const uint16_t sub_blk_sz = UINT8_MAX + 1;
  94. if (totalSize <= sub_blk_sz)
  95. {
  96. arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
  97. }
  98. else
  99. {
  100. uint32_t curIdx = 0;
  101. q7_t curBlkExtr = Q7_MIN;
  102. uint32_t curBlkPos = 0;
  103. uint32_t curBlkIdx = 0;
  104. /*
  105. * process blocks of 255 elts
  106. */
  107. while (totalSize >= sub_blk_sz)
  108. {
  109. const q7_t *curSrc = pSrc;
  110. arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
  111. if (*pResult > curBlkExtr)
  112. {
  113. /*
  114. * update partial extrema
  115. */
  116. curBlkExtr = *pResult;
  117. curBlkPos = *pIndex;
  118. curBlkIdx = curIdx;
  119. }
  120. curIdx++;
  121. pSrc += sub_blk_sz;
  122. totalSize -= sub_blk_sz;
  123. }
  124. /*
  125. * remainder
  126. */
  127. arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex);
  128. if (*pResult > curBlkExtr)
  129. {
  130. curBlkExtr = *pResult;
  131. curBlkPos = *pIndex;
  132. curBlkIdx = curIdx;
  133. }
  134. *pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
  135. *pResult = curBlkExtr;
  136. }
  137. }
  138. #else
  139. void arm_max_q7(
  140. const q7_t * pSrc,
  141. uint32_t blockSize,
  142. q7_t * pResult,
  143. uint32_t * pIndex)
  144. {
  145. q7_t maxVal, out; /* Temporary variables to store the output value. */
  146. uint32_t blkCnt, outIndex; /* Loop counter */
  147. #if defined (ARM_MATH_LOOPUNROLL)
  148. uint32_t index; /* index of maximum value */
  149. #endif
  150. /* Initialise index value to zero. */
  151. outIndex = 0U;
  152. /* Load first input value that act as reference value for comparision */
  153. out = *pSrc++;
  154. #if defined (ARM_MATH_LOOPUNROLL)
  155. /* Initialise index of maximum value. */
  156. index = 0U;
  157. /* Loop unrolling: Compute 4 outputs at a time */
  158. blkCnt = (blockSize - 1U) >> 2U;
  159. while (blkCnt > 0U)
  160. {
  161. /* Initialize maxVal to next consecutive values one by one */
  162. maxVal = *pSrc++;
  163. /* compare for the maximum value */
  164. if (out < maxVal)
  165. {
  166. /* Update the maximum value and it's index */
  167. out = maxVal;
  168. outIndex = index + 1U;
  169. }
  170. maxVal = *pSrc++;
  171. if (out < maxVal)
  172. {
  173. out = maxVal;
  174. outIndex = index + 2U;
  175. }
  176. maxVal = *pSrc++;
  177. if (out < maxVal)
  178. {
  179. out = maxVal;
  180. outIndex = index + 3U;
  181. }
  182. maxVal = *pSrc++;
  183. if (out < maxVal)
  184. {
  185. out = maxVal;
  186. outIndex = index + 4U;
  187. }
  188. index += 4U;
  189. /* Decrement loop counter */
  190. blkCnt--;
  191. }
  192. /* Loop unrolling: Compute remaining outputs */
  193. blkCnt = (blockSize - 1U) % 4U;
  194. #else
  195. /* Initialize blkCnt with number of samples */
  196. blkCnt = (blockSize - 1U);
  197. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  198. while (blkCnt > 0U)
  199. {
  200. /* Initialize maxVal to the next consecutive values one by one */
  201. maxVal = *pSrc++;
  202. /* compare for the maximum value */
  203. if (out < maxVal)
  204. {
  205. /* Update the maximum value and it's index */
  206. out = maxVal;
  207. outIndex = blockSize - blkCnt;
  208. }
  209. /* Decrement loop counter */
  210. blkCnt--;
  211. }
  212. /* Store the maximum value and it's index into destination pointers */
  213. *pResult = out;
  214. *pIndex = outIndex;
  215. }
  216. #endif /* defined(ARM_MATH_MVEI) */
  217. /**
  218. @} end of Max group
  219. */