arm_absmin_q7.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_absmin_q7.c
  4. * Description: Minimum value of absolute values of a Q7 vector
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/statistics_functions.h"
  29. /**
  30. @ingroup groupStats
  31. */
  32. /**
  33. @addtogroup AbsMin
  34. @{
  35. */
  36. /**
  37. @brief Minimum value of absolute values of a Q7 vector.
  38. @param[in] pSrc points to the input vector
  39. @param[in] blockSize number of samples in input vector
  40. @param[out] pResult minimum value returned here
  41. @param[out] pIndex index of minimum value returned here
  42. */
  43. #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
  44. #include <stdint.h>
  45. #include "arm_helium_utils.h"
  46. #define MAX_BLKSZ_S8 (UINT8_MAX+1)
  47. static void arm_small_blk_absmin_q7(
  48. const q7_t *pSrc,
  49. uint32_t blockSize,
  50. q7_t *pResult,
  51. uint32_t *pIndex)
  52. {
  53. uint16_t blkCnt; /* loop counters */
  54. q7x16_t vecSrc;
  55. q7_t const *pSrcVec;
  56. q7x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
  57. q7_t minValue = Q7_ABSMAX;
  58. uint16_t idx = blockSize - 1;
  59. uint8x16_t indexVec;
  60. uint8x16_t curExtremIdxVec;
  61. uint32_t startIdx = 0;
  62. mve_pred16_t p0;
  63. indexVec = vidupq_wb_u8(&startIdx, 1);
  64. curExtremIdxVec = vdupq_n_u8(0);
  65. pSrcVec = (q7_t const *) pSrc;
  66. blkCnt = blockSize >> 4;
  67. while (blkCnt > 0U)
  68. {
  69. vecSrc = vld1q(pSrcVec);
  70. pSrcVec += 16;
  71. vecSrc = vabsq(vecSrc);
  72. /*
  73. * Get current min per lane and current index per lane
  74. * when a min is selected
  75. */
  76. p0 = vcmpleq(vecSrc, curExtremValVec);
  77. curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
  78. curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
  79. indexVec = vidupq_wb_u8(&startIdx, 1);
  80. /*
  81. * Decrement the blockSize loop counter
  82. */
  83. blkCnt--;
  84. }
  85. /*
  86. * tail
  87. * (will be merged thru tail predication)
  88. */
  89. blkCnt = blockSize & 0xF;
  90. if (blkCnt > 0U)
  91. {
  92. vecSrc = vld1q(pSrcVec);
  93. pSrcVec += 16;
  94. vecSrc = vabsq(vecSrc);
  95. p0 = vctp8q(blkCnt);
  96. /*
  97. * Get current min per lane and current index per lane
  98. * when a min is selected
  99. */
  100. p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
  101. curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
  102. curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
  103. }
  104. /*
  105. * Get min value across the vector
  106. */
  107. minValue = vminvq(minValue, curExtremValVec);
  108. /*
  109. * set index for lower values to min possible index
  110. */
  111. p0 = vcmpleq(curExtremValVec, minValue);
  112. idx = vminvq_p_u8(idx, curExtremIdxVec, p0);
  113. /*
  114. * Save result
  115. */
  116. *pIndex = idx;
  117. *pResult = minValue;
  118. }
  119. void arm_absmin_q7(
  120. const q7_t * pSrc,
  121. uint32_t blockSize,
  122. q7_t * pResult,
  123. uint32_t * pIndex)
  124. {
  125. int32_t totalSize = blockSize;
  126. if (totalSize <= MAX_BLKSZ_S8)
  127. {
  128. arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex);
  129. }
  130. else
  131. {
  132. uint32_t curIdx = 0;
  133. q7_t curBlkExtr = Q7_MAX;
  134. uint32_t curBlkPos = 0;
  135. uint32_t curBlkIdx = 0;
  136. /*
  137. * process blocks of 255 elts
  138. */
  139. while (totalSize >= MAX_BLKSZ_S8)
  140. {
  141. const q7_t *curSrc = pSrc;
  142. arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
  143. if (*pResult < curBlkExtr)
  144. {
  145. /*
  146. * update partial extrema
  147. */
  148. curBlkExtr = *pResult;
  149. curBlkPos = *pIndex;
  150. curBlkIdx = curIdx;
  151. }
  152. curIdx++;
  153. pSrc += MAX_BLKSZ_S8;
  154. totalSize -= MAX_BLKSZ_S8;
  155. }
  156. /*
  157. * remainder
  158. */
  159. arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex);
  160. if (*pResult < curBlkExtr)
  161. {
  162. curBlkExtr = *pResult;
  163. curBlkPos = *pIndex;
  164. curBlkIdx = curIdx;
  165. }
  166. *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
  167. *pResult = curBlkExtr;
  168. }
  169. }
  170. #else
  171. #if defined(ARM_MATH_DSP)
  172. void arm_absmin_q7(
  173. const q7_t * pSrc,
  174. uint32_t blockSize,
  175. q7_t * pResult,
  176. uint32_t * pIndex)
  177. {
  178. q7_t cur_absmin, out; /* Temporary variables to store the output value. */\
  179. uint32_t blkCnt, outIndex; /* Loop counter */ \
  180. uint32_t index; /* index of maximum value */ \
  181. \
  182. /* Initialize index value to zero. */ \
  183. outIndex = 0U; \
  184. /* Load first input value that act as reference value for comparision */ \
  185. out = *pSrc++; \
  186. out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
  187. /* Initialize index of extrema value. */ \
  188. index = 0U; \
  189. \
  190. /* Loop unrolling: Compute 4 outputs at a time */ \
  191. blkCnt = (blockSize - 1U) >> 2U; \
  192. \
  193. while (blkCnt > 0U) \
  194. { \
  195. /* Initialize cur_absmin to next consecutive values one by one */ \
  196. cur_absmin = *pSrc++; \
  197. cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
  198. /* compare for the extrema value */ \
  199. if (cur_absmin < out) \
  200. { \
  201. /* Update the extrema value and it's index */ \
  202. out = cur_absmin; \
  203. outIndex = index + 1U; \
  204. } \
  205. \
  206. cur_absmin = *pSrc++; \
  207. cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
  208. if (cur_absmin < out) \
  209. { \
  210. out = cur_absmin; \
  211. outIndex = index + 2U; \
  212. } \
  213. \
  214. cur_absmin = *pSrc++; \
  215. cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
  216. if (cur_absmin < out) \
  217. { \
  218. out = cur_absmin; \
  219. outIndex = index + 3U; \
  220. } \
  221. \
  222. cur_absmin = *pSrc++; \
  223. cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
  224. if (cur_absmin < out) \
  225. { \
  226. out = cur_absmin; \
  227. outIndex = index + 4U; \
  228. } \
  229. \
  230. index += 4U; \
  231. \
  232. /* Decrement loop counter */ \
  233. blkCnt--; \
  234. } \
  235. \
  236. /* Loop unrolling: Compute remaining outputs */ \
  237. blkCnt = (blockSize - 1U) % 4U; \
  238. \
  239. \
  240. while (blkCnt > 0U) \
  241. { \
  242. cur_absmin = *pSrc++; \
  243. cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
  244. if (cur_absmin < out) \
  245. { \
  246. out = cur_absmin; \
  247. outIndex = blockSize - blkCnt; \
  248. } \
  249. \
  250. /* Decrement loop counter */ \
  251. blkCnt--; \
  252. } \
  253. \
  254. /* Store the extrema value and it's index into destination pointers */ \
  255. *pResult = out; \
  256. *pIndex = outIndex;
  257. }
  258. #else
  259. void arm_absmin_q7(
  260. const q7_t * pSrc,
  261. uint32_t blockSize,
  262. q7_t * pResult,
  263. uint32_t * pIndex)
  264. {
  265. q7_t minVal, out; /* Temporary variables to store the output value. */
  266. uint32_t blkCnt, outIndex; /* Loop counter */
  267. /* Initialise index value to zero. */
  268. outIndex = 0U;
  269. /* Load first input value that act as reference value for comparision */
  270. out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
  271. pSrc++;
  272. /* Initialize blkCnt with number of samples */
  273. blkCnt = (blockSize - 1U);
  274. while (blkCnt > 0U)
  275. {
  276. /* Initialize minVal to the next consecutive values one by one */
  277. minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
  278. pSrc++;
  279. /* compare for the minimum value */
  280. if (out > minVal)
  281. {
  282. /* Update the minimum value and it's index */
  283. out = minVal;
  284. outIndex = blockSize - blkCnt;
  285. }
  286. /* Decrement loop counter */
  287. blkCnt--;
  288. }
  289. /* Store the minimum value and it's index into destination pointers */
  290. *pResult = out;
  291. *pIndex = outIndex;
  292. }
  293. #endif /* defined(ARM_MATH_DSP) */
  294. #endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
  295. /**
  296. @} end of AbsMin group
  297. */