synthesis-dct8.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. /******************************************************************************
  2. *
  3. * Copyright (C) 2014 The Android Open Source Project
  4. * Copyright 2003 - 2004 Open Interface North America, Inc. All rights reserved.
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at:
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. ******************************************************************************/
  19. /**********************************************************************************
  20. $Revision: #1 $
  21. ***********************************************************************************/
  22. /** @file
  23. @ingroup codec_internal
  24. */
  25. /**@addgroup codec_internal*/
  26. /**@{*/
  27. /*
  28. * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
  29. * factorization. The scaling factors are folded into the windowing
  30. * constants. 29 adds and 5 16x32 multiplies per 8 samples.
  31. */
  32. #include "common/bt_target.h"
  33. #include "oi_codec_sbc_private.h"
  34. #if (defined(SBC_DEC_INCLUDED) && SBC_DEC_INCLUDED == TRUE)
  35. #define AAN_C4_FIX (759250125)/* S1.30 759250125 0.707107*/
  36. #define AAN_C6_FIX (410903207)/* S1.30 410903207 0.382683*/
  37. #define AAN_Q0_FIX (581104888)/* S1.30 581104888 0.541196*/
  38. #define AAN_Q1_FIX (1402911301)/* S1.30 1402911301 1.306563*/
  39. /** Scales x by y bits to the right, adding a rounding factor.
  40. */
  41. #ifndef SCALE
  42. #define SCALE(x, y) (((x) + (1 <<((y)-1))) >> (y))
  43. #endif
  44. /**
  45. * Default C language implementation of a 32x32->32 multiply. This function may
  46. * be replaced by a platform-specific version for speed.
  47. *
  48. * @param u A signed 32-bit multiplicand
  49. * @param v A signed 32-bit multiplier
  50. * @return A signed 32-bit value corresponding to the 32 most significant bits
  51. * of the 64-bit product of u and v.
  52. */
  53. static INLINE OI_INT32 default_mul_32s_32s_hi(OI_INT32 u, OI_INT32 v)
  54. {
  55. OI_UINT32 u0, v0;
  56. OI_INT32 u1, v1, w1, w2, t;
  57. u0 = u & 0xFFFF; u1 = u >> 16;
  58. v0 = v & 0xFFFF; v1 = v >> 16;
  59. t = u0 * v0;
  60. t = u1 * v0 + ((OI_UINT32)t >> 16);
  61. w1 = t & 0xFFFF;
  62. w2 = t >> 16;
  63. w1 = u0 * v1 + w1;
  64. return u1 * v1 + w2 + (w1 >> 16);
  65. }
  66. #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
  67. #ifdef DEBUG_DCT
  68. PRIVATE void float_dct2_8(float *RESTRICT out, OI_INT32 const *RESTRICT in)
  69. {
  70. #define FIX(x,bits) (((int)floor(0.5f+((x)*((float)(1<<bits)))))/((float)(1<<bits)))
  71. #define FLOAT_BUTTERFLY(x,y) x += y; y = x - (y*2); OI_ASSERT(VALID_INT32(x)); OI_ASSERT(VALID_INT32(y));
  72. #define FLOAT_MULT_DCT(K, sample) (FIX(K,20) * sample)
  73. #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
  74. double L00, L01, L02, L03, L04, L05, L06, L07;
  75. double L25;
  76. double in0, in1, in2, in3;
  77. double in4, in5, in6, in7;
  78. in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in0));
  79. in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in1));
  80. in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in2));
  81. in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in3));
  82. in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in4));
  83. in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in5));
  84. in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in6));
  85. in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); OI_ASSERT(VALID_INT32(in7));
  86. L00 = (in0 + in7); OI_ASSERT(VALID_INT32(L00));
  87. L01 = (in1 + in6); OI_ASSERT(VALID_INT32(L01));
  88. L02 = (in2 + in5); OI_ASSERT(VALID_INT32(L02));
  89. L03 = (in3 + in4); OI_ASSERT(VALID_INT32(L03));
  90. L04 = (in3 - in4); OI_ASSERT(VALID_INT32(L04));
  91. L05 = (in2 - in5); OI_ASSERT(VALID_INT32(L05));
  92. L06 = (in1 - in6); OI_ASSERT(VALID_INT32(L06));
  93. L07 = (in0 - in7); OI_ASSERT(VALID_INT32(L07));
  94. FLOAT_BUTTERFLY(L00, L03);
  95. FLOAT_BUTTERFLY(L01, L02);
  96. L02 += L03; OI_ASSERT(VALID_INT32(L02));
  97. L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); OI_ASSERT(VALID_INT32(L02));
  98. FLOAT_BUTTERFLY(L00, L01);
  99. out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); OI_ASSERT(VALID_INT16(out[0]));
  100. out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); OI_ASSERT(VALID_INT16(out[4]));
  101. FLOAT_BUTTERFLY(L03, L02);
  102. out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); OI_ASSERT(VALID_INT16(out[6]));
  103. out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); OI_ASSERT(VALID_INT16(out[2]));
  104. L04 += L05; OI_ASSERT(VALID_INT32(L04));
  105. L05 += L06; OI_ASSERT(VALID_INT32(L05));
  106. L06 += L07; OI_ASSERT(VALID_INT32(L06));
  107. L04 /= 2;
  108. L05 /= 2;
  109. L06 /= 2;
  110. L07 /= 2;
  111. L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); OI_ASSERT(VALID_INT32(L05));
  112. L25 = L06 - L04; OI_ASSERT(VALID_INT32(L25));
  113. L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); OI_ASSERT(VALID_INT32(L25));
  114. L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); OI_ASSERT(VALID_INT32(L04));
  115. L04 -= L25; OI_ASSERT(VALID_INT32(L04));
  116. L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); OI_ASSERT(VALID_INT32(L06));
  117. L06 -= L25; OI_ASSERT(VALID_INT32(L25));
  118. FLOAT_BUTTERFLY(L07, L05);
  119. FLOAT_BUTTERFLY(L05, L04);
  120. out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1)); OI_ASSERT(VALID_INT16(out[3]));
  121. out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1)); OI_ASSERT(VALID_INT16(out[5]));
  122. FLOAT_BUTTERFLY(L07, L06);
  123. out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1)); OI_ASSERT(VALID_INT16(out[7]));
  124. out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1)); OI_ASSERT(VALID_INT16(out[1]));
  125. }
  126. #undef BUTTERFLY
  127. #endif
  128. /*
  129. * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
  130. * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
  131. * (1244918057 integer). The function it computes is an approximation to the array defined
  132. * by:
  133. *
  134. * diag(aan_s) * AAN= C2
  135. *
  136. * or
  137. *
  138. * AAN = diag(1/aan_s) * C2
  139. *
  140. * where C2 is as it is defined in the comment at the head of this file, and
  141. *
  142. * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
  143. *
  144. * aan_s[i] = [ 1.000 0.510 0.541 0.601 0.707 0.900 1.307 2.563 ]
  145. *
  146. * The output ranges are shown as follows:
  147. *
  148. * Let Y[0..7] = AAN * X[0..7]
  149. *
  150. * Without loss of generality, assume the input vector X consists of elements
  151. * between -1 and 1. The maximum possible value of a given output element occurs
  152. * with some particular combination of input vector elements each of which is -1
  153. * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y is
  154. * maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
  155. * positive contribution to the sum. Equivalently, one may simply sum
  156. * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
  157. *
  158. * This yields approximately [8.00 10.05 9.66 8.52 8.00 5.70 4.00 2.00]
  159. *
  160. * Given the maximum magnitude sensible input value of +/-37992, this yields the
  161. * following vector of maximum output magnitudes:
  162. *
  163. * [ 303936 381820 367003 323692 303936 216555 151968 75984 ]
  164. *
  165. * Ultimately, these values must fit into 16 bit signed integers, so they must
  166. * be scaled. A non-uniform scaling helps maximize the kept precision. The
  167. * relative number of extra bits of precision maintainable with respect to the
  168. * largest value is given here:
  169. *
  170. * [ 0 0 0 0 0 0 1 2 ]
  171. *
  172. */
  173. PRIVATE void dct2_8(SBC_BUFFER_T *RESTRICT out, OI_INT32 const *RESTRICT in)
  174. {
  175. #define BUTTERFLY(x,y) x += y; y = x - (y<<1);
  176. #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K,x)<<2)
  177. OI_INT32 L00, L01, L02, L03, L04, L05, L06, L07;
  178. OI_INT32 L25;
  179. OI_INT32 in0, in1, in2, in3;
  180. OI_INT32 in4, in5, in6, in7;
  181. #if DCTII_8_SHIFT_IN != 0
  182. in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
  183. in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
  184. in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
  185. in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
  186. in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
  187. in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
  188. in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
  189. in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
  190. #else
  191. in0 = in[0];
  192. in1 = in[1];
  193. in2 = in[2];
  194. in3 = in[3];
  195. in4 = in[4];
  196. in5 = in[5];
  197. in6 = in[6];
  198. in7 = in[7];
  199. #endif
  200. L00 = in0 + in7;
  201. L01 = in1 + in6;
  202. L02 = in2 + in5;
  203. L03 = in3 + in4;
  204. L04 = in3 - in4;
  205. L05 = in2 - in5;
  206. L06 = in1 - in6;
  207. L07 = in0 - in7;
  208. BUTTERFLY(L00, L03);
  209. BUTTERFLY(L01, L02);
  210. L02 += L03;
  211. L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
  212. BUTTERFLY(L00, L01);
  213. out[0] = (OI_INT16)SCALE(L00, DCTII_8_SHIFT_0);
  214. out[4] = (OI_INT16)SCALE(L01, DCTII_8_SHIFT_4);
  215. BUTTERFLY(L03, L02);
  216. out[6] = (OI_INT16)SCALE(L02, DCTII_8_SHIFT_6);
  217. out[2] = (OI_INT16)SCALE(L03, DCTII_8_SHIFT_2);
  218. L04 += L05;
  219. L05 += L06;
  220. L06 += L07;
  221. L04 /= 2;
  222. L05 /= 2;
  223. L06 /= 2;
  224. L07 /= 2;
  225. L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
  226. L25 = L06 - L04;
  227. L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
  228. L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
  229. L04 -= L25;
  230. L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
  231. L06 -= L25;
  232. BUTTERFLY(L07, L05);
  233. BUTTERFLY(L05, L04);
  234. out[3] = (OI_INT16)SCALE(L04, DCTII_8_SHIFT_3 - 1);
  235. out[5] = (OI_INT16)SCALE(L05, DCTII_8_SHIFT_5 - 1);
  236. BUTTERFLY(L07, L06);
  237. out[7] = (OI_INT16)SCALE(L06, DCTII_8_SHIFT_7 - 1);
  238. out[1] = (OI_INT16)SCALE(L07, DCTII_8_SHIFT_1 - 1);
  239. #undef BUTTERFLY
  240. #ifdef DEBUG_DCT
  241. {
  242. float float_out[8];
  243. float_dct2_8(float_out, in);
  244. }
  245. #endif
  246. }
  247. /**@}*/
  248. #endif /* #if (defined(SBC_DEC_INCLUDED) && SBC_DEC_INCLUDED == TRUE) */