arm_scale_q31.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_scale_q31.c
  4. * Description: Multiplies a Q31 vector by a scalar
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/basic_math_functions.h"
  29. /**
  30. @ingroup groupMath
  31. */
  32. /**
  33. @addtogroup BasicScale
  34. @{
  35. */
  36. /**
  37. @brief Multiplies a Q31 vector by a scalar.
  38. @param[in] pSrc points to the input vector
  39. @param[in] scaleFract fractional portion of the scale value
  40. @param[in] shift number of bits to shift the result by
  41. @param[out] pDst points to the output vector
  42. @param[in] blockSize number of samples in each vector
  43. @par Scaling and Overflow Behavior
  44. The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
  45. These are multiplied to yield a 2.62 intermediate result and this is shifted
  46. with saturation to 1.31 format.
  47. There is an intermediate shift by 32 to go from the
  48. 2.62 to 1.31 format.
  49. The shift argument is applied on the 1.31 result and not to the intermediate
  50. 2.62 format.
  51. */
  52. #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
  53. #include "arm_helium_utils.h"
  54. void arm_scale_q31(
  55. const q31_t * pSrc,
  56. q31_t scaleFract,
  57. int8_t shift,
  58. q31_t * pDst,
  59. uint32_t blockSize)
  60. {
  61. uint32_t blkCnt; /* loop counters */
  62. q31x4_t vecSrc;
  63. q31x4_t vecDst;
  64. /* Compute 4 outputs at a time */
  65. blkCnt = blockSize >> 2;
  66. while (blkCnt > 0U)
  67. {
  68. /*
  69. * C = A * scale
  70. * Scale the input and then store the result in the destination buffer.
  71. */
  72. vecSrc = vld1q(pSrc);
  73. vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
  74. vecDst = vqshlq_r(vecDst, shift + 1);
  75. vst1q(pDst, vecDst);
  76. /*
  77. * Decrement the blockSize loop counter
  78. */
  79. blkCnt--;
  80. /*
  81. * advance vector source and destination pointers
  82. */
  83. pSrc += 4;
  84. pDst += 4;
  85. }
  86. /*
  87. * tail
  88. */
  89. blkCnt = blockSize & 3;
  90. if (blkCnt > 0U)
  91. {
  92. mve_pred16_t p0 = vctp32q(blkCnt);
  93. vecSrc = vld1q(pSrc);
  94. vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
  95. vecDst = vqshlq_r(vecDst, shift + 1);
  96. vstrwq_p(pDst, vecDst, p0);
  97. }
  98. }
  99. #else
  100. void arm_scale_q31(
  101. const q31_t *pSrc,
  102. q31_t scaleFract,
  103. int8_t shift,
  104. q31_t *pDst,
  105. uint32_t blockSize)
  106. {
  107. uint32_t blkCnt; /* Loop counter */
  108. q31_t in, out; /* Temporary variables */
  109. int8_t kShift = shift + 1; /* Shift to apply after scaling */
  110. int8_t sign = (kShift & 0x80);
  111. #if defined (ARM_MATH_LOOPUNROLL)
  112. /* Loop unrolling: Compute 4 outputs at a time */
  113. blkCnt = blockSize >> 2U;
  114. if (sign == 0U)
  115. {
  116. while (blkCnt > 0U)
  117. {
  118. /* C = A * scale */
  119. /* Scale input and store result in destination buffer. */
  120. in = *pSrc++; /* read input from source */
  121. in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
  122. out = in << kShift; /* apply shifting */
  123. if (in != (out >> kShift)) /* saturate the result */
  124. out = 0x7FFFFFFF ^ (in >> 31);
  125. *pDst++ = out; /* Store result destination */
  126. in = *pSrc++;
  127. in = ((q63_t) in * scaleFract) >> 32;
  128. out = in << kShift;
  129. if (in != (out >> kShift))
  130. out = 0x7FFFFFFF ^ (in >> 31);
  131. *pDst++ = out;
  132. in = *pSrc++;
  133. in = ((q63_t) in * scaleFract) >> 32;
  134. out = in << kShift;
  135. if (in != (out >> kShift))
  136. out = 0x7FFFFFFF ^ (in >> 31);
  137. *pDst++ = out;
  138. in = *pSrc++;
  139. in = ((q63_t) in * scaleFract) >> 32;
  140. out = in << kShift;
  141. if (in != (out >> kShift))
  142. out = 0x7FFFFFFF ^ (in >> 31);
  143. *pDst++ = out;
  144. /* Decrement loop counter */
  145. blkCnt--;
  146. }
  147. }
  148. else
  149. {
  150. while (blkCnt > 0U)
  151. {
  152. /* C = A * scale */
  153. /* Scale input and store result in destination buffer. */
  154. in = *pSrc++; /* read four inputs from source */
  155. in = ((q63_t) in * scaleFract) >> 32; /* multiply input with scaler value */
  156. out = in >> -kShift; /* apply shifting */
  157. *pDst++ = out; /* Store result destination */
  158. in = *pSrc++;
  159. in = ((q63_t) in * scaleFract) >> 32;
  160. out = in >> -kShift;
  161. *pDst++ = out;
  162. in = *pSrc++;
  163. in = ((q63_t) in * scaleFract) >> 32;
  164. out = in >> -kShift;
  165. *pDst++ = out;
  166. in = *pSrc++;
  167. in = ((q63_t) in * scaleFract) >> 32;
  168. out = in >> -kShift;
  169. *pDst++ = out;
  170. /* Decrement loop counter */
  171. blkCnt--;
  172. }
  173. }
  174. /* Loop unrolling: Compute remaining outputs */
  175. blkCnt = blockSize % 0x4U;
  176. #else
  177. /* Initialize blkCnt with number of samples */
  178. blkCnt = blockSize;
  179. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  180. if (sign == 0U)
  181. {
  182. while (blkCnt > 0U)
  183. {
  184. /* C = A * scale */
  185. /* Scale input and store result in destination buffer. */
  186. in = *pSrc++;
  187. in = ((q63_t) in * scaleFract) >> 32;
  188. out = in << kShift;
  189. if (in != (out >> kShift))
  190. out = 0x7FFFFFFF ^ (in >> 31);
  191. *pDst++ = out;
  192. /* Decrement loop counter */
  193. blkCnt--;
  194. }
  195. }
  196. else
  197. {
  198. while (blkCnt > 0U)
  199. {
  200. /* C = A * scale */
  201. /* Scale input and store result in destination buffer. */
  202. in = *pSrc++;
  203. in = ((q63_t) in * scaleFract) >> 32;
  204. out = in >> -kShift;
  205. *pDst++ = out;
  206. /* Decrement loop counter */
  207. blkCnt--;
  208. }
  209. }
  210. }
  211. #endif /* defined(ARM_MATH_MVEI) */
  212. /**
  213. @} end of BasicScale group
  214. */