arm_cfft_radix2_f16.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cfft_radix2_f16.c
  4. * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
  5. *
  6. * $Date: 23 April 2021
  7. * $Revision: V1.9.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/transform_functions_f16.h"
  29. #if defined(ARM_FLOAT16_SUPPORTED)
  30. void arm_radix2_butterfly_f16(
  31. float16_t * pSrc,
  32. uint32_t fftLen,
  33. const float16_t * pCoef,
  34. uint16_t twidCoefModifier);
  35. void arm_radix2_butterfly_inverse_f16(
  36. float16_t * pSrc,
  37. uint32_t fftLen,
  38. const float16_t * pCoef,
  39. uint16_t twidCoefModifier,
  40. float16_t onebyfftLen);
  41. extern void arm_bitreversal_f16(
  42. float16_t * pSrc,
  43. uint16_t fftSize,
  44. uint16_t bitRevFactor,
  45. const uint16_t * pBitRevTab);
  46. /**
  47. @ingroup groupTransforms
  48. */
  49. /**
  50. @addtogroup ComplexFFT
  51. @{
  52. */
  53. /**
  54. @brief Radix-2 CFFT/CIFFT.
  55. @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future
  56. @param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
  57. @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
  58. @return none
  59. */
  60. void arm_cfft_radix2_f16(
  61. const arm_cfft_radix2_instance_f16 * S,
  62. float16_t * pSrc)
  63. {
  64. if (S->ifftFlag == 1U)
  65. {
  66. /* Complex IFFT radix-2 */
  67. arm_radix2_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle,
  68. S->twidCoefModifier, S->onebyfftLen);
  69. }
  70. else
  71. {
  72. /* Complex FFT radix-2 */
  73. arm_radix2_butterfly_f16(pSrc, S->fftLen, S->pTwiddle,
  74. S->twidCoefModifier);
  75. }
  76. if (S->bitReverseFlag == 1U)
  77. {
  78. /* Bit Reversal */
  79. arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
  80. }
  81. }
  82. /**
  83. @} end of ComplexFFT group
  84. */
  85. /* ----------------------------------------------------------------------
  86. ** Internal helper function used by the FFTs
  87. ** ------------------------------------------------------------------- */
  88. /*
  89. * @brief Core function for the floating-point CFFT butterfly process.
  90. * @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
  91. * @param[in] fftLen length of the FFT.
  92. * @param[in] *pCoef points to the twiddle coefficient buffer.
  93. * @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
  94. * @return none.
  95. */
  96. void arm_radix2_butterfly_f16(
  97. float16_t * pSrc,
  98. uint32_t fftLen,
  99. const float16_t * pCoef,
  100. uint16_t twidCoefModifier)
  101. {
  102. uint32_t i, j, k, l;
  103. uint32_t n1, n2, ia;
  104. float16_t xt, yt, cosVal, sinVal;
  105. float16_t p0, p1, p2, p3;
  106. float16_t a0, a1;
  107. #if defined (ARM_MATH_DSP)
  108. /* Initializations for the first stage */
  109. n2 = fftLen >> 1;
  110. ia = 0;
  111. i = 0;
  112. // loop for groups
  113. for (k = n2; k > 0; k--)
  114. {
  115. cosVal = pCoef[ia * 2];
  116. sinVal = pCoef[(ia * 2) + 1];
  117. /* Twiddle coefficients index modifier */
  118. ia += twidCoefModifier;
  119. /* index calculation for the input as, */
  120. /* pSrc[i + 0], pSrc[i + fftLen/1] */
  121. l = i + n2;
  122. /* Butterfly implementation */
  123. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  124. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  125. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  126. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  127. p0 = (_Float16)xt * (_Float16)cosVal;
  128. p1 = (_Float16)yt * (_Float16)sinVal;
  129. p2 = (_Float16)yt * (_Float16)cosVal;
  130. p3 = (_Float16)xt * (_Float16)sinVal;
  131. pSrc[2 * i] = a0;
  132. pSrc[2 * i + 1] = a1;
  133. pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
  134. pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
  135. i++;
  136. } // groups loop end
  137. twidCoefModifier <<= 1U;
  138. // loop for stage
  139. for (k = n2; k > 2; k = k >> 1)
  140. {
  141. n1 = n2;
  142. n2 = n2 >> 1;
  143. ia = 0;
  144. // loop for groups
  145. j = 0;
  146. do
  147. {
  148. cosVal = pCoef[ia * 2];
  149. sinVal = pCoef[(ia * 2) + 1];
  150. ia += twidCoefModifier;
  151. // loop for butterfly
  152. i = j;
  153. do
  154. {
  155. l = i + n2;
  156. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  157. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  158. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  159. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  160. p0 = (_Float16)xt * (_Float16)cosVal;
  161. p1 = (_Float16)yt * (_Float16)sinVal;
  162. p2 = (_Float16)yt * (_Float16)cosVal;
  163. p3 = (_Float16)xt * (_Float16)sinVal;
  164. pSrc[2 * i] = a0;
  165. pSrc[2 * i + 1] = a1;
  166. pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
  167. pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
  168. i += n1;
  169. } while ( i < fftLen ); // butterfly loop end
  170. j++;
  171. } while ( j < n2); // groups loop end
  172. twidCoefModifier <<= 1U;
  173. } // stages loop end
  174. // loop for butterfly
  175. for (i = 0; i < fftLen; i += 2)
  176. {
  177. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
  178. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
  179. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
  180. a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
  181. pSrc[2 * i] = a0;
  182. pSrc[2 * i + 1] = a1;
  183. pSrc[2 * i + 2] = xt;
  184. pSrc[2 * i + 3] = yt;
  185. } // groups loop end
  186. #else
  187. n2 = fftLen;
  188. // loop for stage
  189. for (k = fftLen; k > 1; k = k >> 1)
  190. {
  191. n1 = n2;
  192. n2 = n2 >> 1;
  193. ia = 0;
  194. // loop for groups
  195. j = 0;
  196. do
  197. {
  198. cosVal = pCoef[ia * 2];
  199. sinVal = pCoef[(ia * 2) + 1];
  200. ia += twidCoefModifier;
  201. // loop for butterfly
  202. i = j;
  203. do
  204. {
  205. l = i + n2;
  206. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  207. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  208. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  209. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  210. p0 = (_Float16)xt * (_Float16)cosVal;
  211. p1 = (_Float16)yt * (_Float16)sinVal;
  212. p2 = (_Float16)yt * (_Float16)cosVal;
  213. p3 = (_Float16)xt * (_Float16)sinVal;
  214. pSrc[2 * i] = a0;
  215. pSrc[2 * i + 1] = a1;
  216. pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
  217. pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
  218. i += n1;
  219. } while (i < fftLen);
  220. j++;
  221. } while (j < n2);
  222. twidCoefModifier <<= 1U;
  223. }
  224. #endif // #if defined (ARM_MATH_DSP)
  225. }
  226. void arm_radix2_butterfly_inverse_f16(
  227. float16_t * pSrc,
  228. uint32_t fftLen,
  229. const float16_t * pCoef,
  230. uint16_t twidCoefModifier,
  231. float16_t onebyfftLen)
  232. {
  233. uint32_t i, j, k, l;
  234. uint32_t n1, n2, ia;
  235. float16_t xt, yt, cosVal, sinVal;
  236. float16_t p0, p1, p2, p3;
  237. float16_t a0, a1;
  238. #if defined (ARM_MATH_DSP)
  239. n2 = fftLen >> 1;
  240. ia = 0;
  241. // loop for groups
  242. for (i = 0; i < n2; i++)
  243. {
  244. cosVal = pCoef[ia * 2];
  245. sinVal = pCoef[(ia * 2) + 1];
  246. ia += twidCoefModifier;
  247. l = i + n2;
  248. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  249. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  250. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  251. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  252. p0 = (_Float16)xt * (_Float16)cosVal;
  253. p1 = (_Float16)yt * (_Float16)sinVal;
  254. p2 = (_Float16)yt * (_Float16)cosVal;
  255. p3 = (_Float16)xt * (_Float16)sinVal;
  256. pSrc[2 * i] = a0;
  257. pSrc[2 * i + 1] = a1;
  258. pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
  259. pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
  260. } // groups loop end
  261. twidCoefModifier <<= 1U;
  262. // loop for stage
  263. for (k = fftLen / 2; k > 2; k = k >> 1)
  264. {
  265. n1 = n2;
  266. n2 = n2 >> 1;
  267. ia = 0;
  268. // loop for groups
  269. j = 0;
  270. do
  271. {
  272. cosVal = pCoef[ia * 2];
  273. sinVal = pCoef[(ia * 2) + 1];
  274. ia += twidCoefModifier;
  275. // loop for butterfly
  276. i = j;
  277. do
  278. {
  279. l = i + n2;
  280. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  281. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  282. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  283. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  284. p0 = (_Float16)xt * (_Float16)cosVal;
  285. p1 = (_Float16)yt * (_Float16)sinVal;
  286. p2 = (_Float16)yt * (_Float16)cosVal;
  287. p3 = (_Float16)xt * (_Float16)sinVal;
  288. pSrc[2 * i] = a0;
  289. pSrc[2 * i + 1] = a1;
  290. pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
  291. pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
  292. i += n1;
  293. } while ( i < fftLen ); // butterfly loop end
  294. j++;
  295. } while (j < n2); // groups loop end
  296. twidCoefModifier <<= 1U;
  297. } // stages loop end
  298. // loop for butterfly
  299. for (i = 0; i < fftLen; i += 2)
  300. {
  301. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
  302. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
  303. a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
  304. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
  305. p0 = (_Float16)a0 * (_Float16)onebyfftLen;
  306. p2 = (_Float16)xt * (_Float16)onebyfftLen;
  307. p1 = (_Float16)a1 * (_Float16)onebyfftLen;
  308. p3 = (_Float16)yt * (_Float16)onebyfftLen;
  309. pSrc[2 * i] = p0;
  310. pSrc[2 * i + 1] = p1;
  311. pSrc[2 * i + 2] = p2;
  312. pSrc[2 * i + 3] = p3;
  313. } // butterfly loop end
  314. #else
  315. n2 = fftLen;
  316. // loop for stage
  317. for (k = fftLen; k > 2; k = k >> 1)
  318. {
  319. n1 = n2;
  320. n2 = n2 >> 1;
  321. ia = 0;
  322. // loop for groups
  323. j = 0;
  324. do
  325. {
  326. cosVal = pCoef[ia * 2];
  327. sinVal = pCoef[(ia * 2) + 1];
  328. ia = ia + twidCoefModifier;
  329. // loop for butterfly
  330. i = j;
  331. do
  332. {
  333. l = i + n2;
  334. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  335. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  336. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  337. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  338. p0 = (_Float16)xt * (_Float16)cosVal;
  339. p1 = (_Float16)yt * (_Float16)sinVal;
  340. p2 = (_Float16)yt * (_Float16)cosVal;
  341. p3 = (_Float16)xt * (_Float16)sinVal;
  342. pSrc[2 * i] = a0;
  343. pSrc[2 * i + 1] = a1;
  344. pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
  345. pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
  346. i += n1;
  347. } while ( i < fftLen ); // butterfly loop end
  348. j++;
  349. } while ( j < n2 ); // groups loop end
  350. twidCoefModifier = twidCoefModifier << 1U;
  351. } // stages loop end
  352. n1 = n2;
  353. n2 = n2 >> 1;
  354. // loop for butterfly
  355. for (i = 0; i < fftLen; i += n1)
  356. {
  357. l = i + n2;
  358. a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
  359. xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
  360. a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
  361. yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
  362. p0 = (_Float16)a0 * (_Float16)onebyfftLen;
  363. p2 = (_Float16)xt * (_Float16)onebyfftLen;
  364. p1 = (_Float16)a1 * (_Float16)onebyfftLen;
  365. p3 = (_Float16)yt * (_Float16)onebyfftLen;
  366. pSrc[2 * i] = p0;
  367. pSrc[2U * l] = p2;
  368. pSrc[2 * i + 1] = p1;
  369. pSrc[2U * l + 1U] = p3;
  370. } // butterfly loop end
  371. #endif // #if defined (ARM_MATH_DSP)
  372. }
  373. #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */