arm_float_to_q7.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_float_to_q7.c
  4. * Description: Converts the elements of the floating-point vector to Q7 vector
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/support_functions.h"
  29. /**
  30. @ingroup groupSupport
  31. */
  32. /**
  33. @addtogroup float_to_x
  34. @{
  35. */
  36. /**
  37. * @brief Converts the elements of the floating-point vector to Q7 vector.
  38. * @param[in] *pSrc points to the floating-point input vector
  39. * @param[out] *pDst points to the Q7 output vector
  40. * @param[in] blockSize length of the input vector
  41. * @return none.
  42. *
  43. *\par Description:
  44. * \par
  45. * The equation used for the conversion process is:
  46. * <pre>
  47. * pDst[n] = (q7_t)(pSrc[n] * 128); 0 <= n < blockSize.
  48. * </pre>
  49. * \par Scaling and Overflow Behavior:
  50. * \par
  51. * The function uses saturating arithmetic.
  52. * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
  53. * \note
  54. * In order to apply rounding, the library should be rebuilt with the ROUNDING macro
  55. * defined in the preprocessor section of project options.
  56. */
  57. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  58. void arm_float_to_q7(
  59. const float32_t * pSrc,
  60. q7_t * pDst,
  61. uint32_t blockSize)
  62. {
  63. uint32_t blkCnt; /* loop counters */
  64. float32_t maxQ = powf(2.0, 7);
  65. f32x4x4_t tmp;
  66. q15x8_t evVec, oddVec;
  67. q7x16_t vecDst;
  68. float32_t const *pSrcVec;
  69. #ifdef ARM_MATH_ROUNDING
  70. float32_t in;
  71. #endif
  72. pSrcVec = (float32_t const *) pSrc;
  73. blkCnt = blockSize >> 4;
  74. while (blkCnt > 0U) {
  75. tmp = vld4q(pSrcVec);
  76. pSrcVec += 16;
  77. /*
  78. * C = A * 128.0
  79. * convert from float to q7 and then store the results in the destination buffer
  80. */
  81. tmp.val[0] = vmulq(tmp.val[0], maxQ);
  82. tmp.val[1] = vmulq(tmp.val[1], maxQ);
  83. tmp.val[2] = vmulq(tmp.val[2], maxQ);
  84. tmp.val[3] = vmulq(tmp.val[3], maxQ);
  85. /*
  86. * convert and pack evens
  87. */
  88. evVec = vqmovnbq(evVec, vcvtaq_s32_f32(tmp.val[0]));
  89. evVec = vqmovntq(evVec, vcvtaq_s32_f32(tmp.val[2]));
  90. /*
  91. * convert and pack odds
  92. */
  93. oddVec = vqmovnbq(oddVec, vcvtaq_s32_f32(tmp.val[1]));
  94. oddVec = vqmovntq(oddVec, vcvtaq_s32_f32(tmp.val[3]));
  95. /*
  96. * merge
  97. */
  98. vecDst = vqmovnbq(vecDst, evVec);
  99. vecDst = vqmovntq(vecDst, oddVec);
  100. vst1q(pDst, vecDst);
  101. pDst += 16;
  102. /*
  103. * Decrement the blockSize loop counter
  104. */
  105. blkCnt--;
  106. }
  107. blkCnt = blockSize & 0xF;
  108. while (blkCnt > 0U)
  109. {
  110. /* C = A * 128 */
  111. /* Convert from float to q7 and store result in destination buffer */
  112. #ifdef ARM_MATH_ROUNDING
  113. in = (*pSrcVec++ * 128);
  114. in += in > 0.0f ? 0.5f : -0.5f;
  115. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  116. #else
  117. *pDst++ = (q7_t) __SSAT((q31_t) (*pSrcVec++ * 128.0f), 8);
  118. #endif /* #ifdef ARM_MATH_ROUNDING */
  119. /* Decrement loop counter */
  120. blkCnt--;
  121. }
  122. }
  123. #else
  124. #if defined(ARM_MATH_NEON)
  125. void arm_float_to_q7(
  126. const float32_t * pSrc,
  127. q7_t * pDst,
  128. uint32_t blockSize)
  129. {
  130. const float32_t *pIn = pSrc; /* Src pointer */
  131. uint32_t blkCnt; /* loop counter */
  132. float32x4_t inV;
  133. #ifdef ARM_MATH_ROUNDING
  134. float32_t in;
  135. float32x4_t zeroV = vdupq_n_f32(0.0f);
  136. float32x4_t pHalf = vdupq_n_f32(0.5f / 128.0f);
  137. float32x4_t mHalf = vdupq_n_f32(-0.5f / 128.0f);
  138. float32x4_t r;
  139. uint32x4_t cmp;
  140. #endif
  141. int16x4_t cvt1,cvt2;
  142. int8x8_t outV;
  143. blkCnt = blockSize >> 3U;
  144. /* Compute 8 outputs at a time.
  145. ** a second loop below computes the remaining 1 to 7 samples. */
  146. while (blkCnt > 0U)
  147. {
  148. #ifdef ARM_MATH_ROUNDING
  149. /* C = A * 128 */
  150. /* Convert from float to q7 and then store the results in the destination buffer */
  151. inV = vld1q_f32(pIn);
  152. cmp = vcgtq_f32(inV,zeroV);
  153. r = vbslq_f32(cmp,pHalf,mHalf);
  154. inV = vaddq_f32(inV, r);
  155. cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
  156. pIn += 4;
  157. inV = vld1q_f32(pIn);
  158. cmp = vcgtq_f32(inV,zeroV);
  159. r = vbslq_f32(cmp,pHalf,mHalf);
  160. inV = vaddq_f32(inV, r);
  161. cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
  162. pIn += 4;
  163. outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
  164. vst1_s8(pDst, outV);
  165. pDst += 8;
  166. #else
  167. /* C = A * 128 */
  168. /* Convert from float to q7 and then store the results in the destination buffer */
  169. inV = vld1q_f32(pIn);
  170. cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
  171. pIn += 4;
  172. inV = vld1q_f32(pIn);
  173. cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
  174. pIn += 4;
  175. outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
  176. vst1_s8(pDst, outV);
  177. pDst += 8;
  178. #endif /* #ifdef ARM_MATH_ROUNDING */
  179. /* Decrement the loop counter */
  180. blkCnt--;
  181. }
  182. /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
  183. ** No loop unrolling is used. */
  184. blkCnt = blockSize & 7;
  185. while (blkCnt > 0U)
  186. {
  187. #ifdef ARM_MATH_ROUNDING
  188. /* C = A * 128 */
  189. /* Convert from float to q7 and then store the results in the destination buffer */
  190. in = *pIn++;
  191. in = (in * 128);
  192. in += in > 0.0f ? 0.5f : -0.5f;
  193. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  194. #else
  195. /* C = A * 128 */
  196. /* Convert from float to q7 and then store the results in the destination buffer */
  197. *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
  198. #endif /* #ifdef ARM_MATH_ROUNDING */
  199. /* Decrement the loop counter */
  200. blkCnt--;
  201. }
  202. }
  203. #else
  204. void arm_float_to_q7(
  205. const float32_t * pSrc,
  206. q7_t * pDst,
  207. uint32_t blockSize)
  208. {
  209. uint32_t blkCnt; /* Loop counter */
  210. const float32_t *pIn = pSrc; /* Source pointer */
  211. #ifdef ARM_MATH_ROUNDING
  212. float32_t in;
  213. #endif /* #ifdef ARM_MATH_ROUNDING */
  214. #if defined (ARM_MATH_LOOPUNROLL)
  215. /* Loop unrolling: Compute 4 outputs at a time */
  216. blkCnt = blockSize >> 2U;
  217. while (blkCnt > 0U)
  218. {
  219. /* C = A * 128 */
  220. /* Convert from float to q7 and store result in destination buffer */
  221. #ifdef ARM_MATH_ROUNDING
  222. in = (*pIn++ * 128);
  223. in += in > 0.0f ? 0.5f : -0.5f;
  224. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  225. in = (*pIn++ * 128);
  226. in += in > 0.0f ? 0.5f : -0.5f;
  227. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  228. in = (*pIn++ * 128);
  229. in += in > 0.0f ? 0.5f : -0.5f;
  230. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  231. in = (*pIn++ * 128);
  232. in += in > 0.0f ? 0.5f : -0.5f;
  233. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  234. #else
  235. *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
  236. *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
  237. *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
  238. *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
  239. #endif /* #ifdef ARM_MATH_ROUNDING */
  240. /* Decrement loop counter */
  241. blkCnt--;
  242. }
  243. /* Loop unrolling: Compute remaining outputs */
  244. blkCnt = blockSize % 0x4U;
  245. #else
  246. /* Initialize blkCnt with number of samples */
  247. blkCnt = blockSize;
  248. #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  249. while (blkCnt > 0U)
  250. {
  251. /* C = A * 128 */
  252. /* Convert from float to q7 and store result in destination buffer */
  253. #ifdef ARM_MATH_ROUNDING
  254. in = (*pIn++ * 128);
  255. in += in > 0.0f ? 0.5f : -0.5f;
  256. *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
  257. #else
  258. *pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
  259. #endif /* #ifdef ARM_MATH_ROUNDING */
  260. /* Decrement loop counter */
  261. blkCnt--;
  262. }
  263. }
  264. #endif /* #if defined(ARM_MATH_NEON) */
  265. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  266. /**
  267. @} end of float_to_x group
  268. */