c6x-compat.h 41 KB


  1. /*
  2. * Copyright (c) 2006-2010 Tensilica Inc. ALL RIGHTS RESERVED.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. * THE SOFTWARE.
  21. */
  22. #ifndef __C6X_COMPAT__H
  23. #define __C6X_COMPAT__H
  24. /* Unimplemented functions _gmpy, _gmpy4, _xormpy, _lssub, _cmpy, _cmpyr,
  25. _cmpyr1, _ddotpl2r, _ddotph2r */
  26. typedef long long C6X_COMPAT_LONG40;
  27. #define _memd8(a) (*((double*)(a)))
  28. #define _memd8_const(a) (*((const double*)(a)))
  29. #define _amemd8(a) (*((double*)(a)))
  30. #define _amemd8_const(a) (*((const double*)(a)))
  31. #define _mem8(a) (*((unsigned long long*)(a)))
  32. #define _mem8_const(a) (*((const unsigned long long*)(a)))
  33. #define _mem4(a) (*((unsigned*)(a)))
  34. #define _mem4_const(a) (*((const unsigned*)(a)))
  35. #define _amem4_const(a) (*((const unsigned*)(a)))
  36. /* NOTE: To emulate a C6X properly you should define global variables
  37. for your Xtensa with these names. Some of the emulation routines
  38. will set these values. */
  39. extern int _carry;
  40. extern int _overflow;
  41. // Utility routines
  42. #define TESTBIT(x,n) (((x) >> (n)) & 1)
  43. #define NSA_BITS 32
  44. static inline unsigned int norm_shift_amt_U_and_non_U(int is_signed, int inp) {
  45. int j=0, k=0;
  46. int x=inp;
  47. if (is_signed) {
  48. /* Invert signed val if negative */
  49. x= TESTBIT(x,(NSA_BITS-1))? ~x: x;
  50. x= (x&1)|(x<<1); /* Shift up to return count-1 */
  51. if (x ==0)
  52. return NSA_BITS-1;
  53. }
  54. if (x ==0)
  55. return NSA_BITS;
  56. /* Now count leading zeros */
  57. for (j=0, k=NSA_BITS-1; k>=0; j++, k--) {
  58. if (TESTBIT(x,k))
  59. return j;
  60. }
  61. return NSA_BITS;
  62. }
  63. static inline long long
  64. orig_L40_set( long long L40_var1) {
  65. long long L40_var_out;
  66. L40_var_out = L40_var1 & 0x000000ffffffffffLL;
  67. if( L40_var1 & 0x8000000000LL)
  68. L40_var_out = L40_var_out | 0xffffff0000000000LL;
  69. return( L40_var_out);
  70. }
  71. static inline signed long long
  72. util_saturate_n_no_state(signed long long t, int n)
  73. {
  74. signed long long maxv, minv;
  75. maxv = (1LL << (n-1)) - 1;
  76. minv = (-1LL << (n-1));
  77. if (t > maxv) {
  78. t = maxv;
  79. } else if (t < minv) {
  80. t = minv;
  81. }
  82. return t;
  83. }
  84. static inline signed long long
  85. util_saturate_n_sgn(signed long long t, int n)
  86. {
  87. signed long long result;
  88. signed long long maxv, minv;
  89. maxv = (1LL << (n-1)) - 1;
  90. minv = (-1LL << (n-1));
  91. if (t > 0) {
  92. result = maxv;
  93. _overflow = 1;
  94. } else if (t < 0) {
  95. result = minv;
  96. _overflow = 1;
  97. } else {
  98. result = 0;
  99. }
  100. return result;
  101. }
  102. /* well-behaved signed shift right (left on negative) with
  103. saturation */
  104. static inline signed long long
  105. util_shift_right_saturate_n(signed long long t, int shval, int n)
  106. {
  107. /* n should be <= 62 */
  108. long long result;
  109. signed long long mask;
  110. int actual_shift = shval;
  111. long long shft = actual_shift > 0 ? actual_shift : -actual_shift;
  112. if (t == 0 || actual_shift == 0)
  113. return t;
  114. if (actual_shift >= n) {
  115. return (t < 0) ? -1 : 0;
  116. }
  117. if (actual_shift <= -n) {
  118. return util_saturate_n_sgn(t, n);
  119. }
  120. if (actual_shift > 0) {
  121. return t >> actual_shift;
  122. }
  123. /* actual_shift < 0. Check for saturation after shift. */
  124. mask = (-1LL << (n-shft-1));
  125. if (t > 0 && ((mask & t) != 0)) {
  126. return util_saturate_n_sgn(t, n);
  127. }
  128. if (t < 0 && ((mask & t) != mask)) {
  129. return util_saturate_n_sgn(t, n);
  130. }
  131. result = t << shft;
  132. return result;
  133. }
  134. /* Implemented c6x standard C compatibility functions (alphabetical
  135. order) */
  136. static inline int _abs(int src1) {
  137. if ((unsigned) src1 == (unsigned) 0x80000000) {
  138. return 0x7fffffff;
  139. }
  140. return abs(src1);
  141. }
  142. static inline int _abs2(int src1) {
  143. short s1[2],r[2];
  144. int result;
  145. *((int*)s1) = src1;
  146. if ((unsigned short) s1[1] == (unsigned short) 0x8000) r[1] = 0x7fff;
  147. else r[1] = abs(s1[1]);
  148. if ((unsigned short) s1[0] == (unsigned short) 0x8000) r[0] = 0x7fff;
  149. else r[0] = abs(s1[0]);
  150. result = *(int*)r;
  151. return result;
  152. }
  153. static inline int _add2(int src1, int src2) {
  154. short s1[2], s2[2], r[2];
  155. int result;
  156. *((int*)s1) = src1;
  157. *((int*)s2) = src2;
  158. r[0] = s1[0] + s2[0];
  159. r[1] = s1[1] + s2[1];
  160. result = *(int*)r;
  161. return result;
  162. }
  163. static inline int _add4(int src1, int src2) {
  164. char c1[4], c2[4], r[4];
  165. int result;
  166. *((int*)c1) = src1;
  167. *((int*)c2) = src2;
  168. r[0] = c1[0] + c2[0];
  169. r[1] = c1[1] + c2[1];
  170. r[2] = c1[2] + c2[2];
  171. r[3] = c1[3] + c2[3];
  172. result = *(int*)r;
  173. return result;
  174. }
  175. static inline long long _addsub(unsigned int src1, unsigned int src2)
  176. {
  177. int res_lo;
  178. int res_hi;
  179. res_hi = src1+src2;
  180. res_lo = src1-src2;
  181. return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ;
  182. }
  183. static inline long long _addsub2(unsigned int src1, unsigned int src2)
  184. {
  185. short s1[2], s2[2], ra[2], rs[2];
  186. int res_lo;
  187. int res_hi;
  188. *((int*)s1) = src1;
  189. *((int*)s2) = src2;
  190. ra[0] = s1[0] + s2[0];
  191. ra[1] = s1[1] + s2[1];
  192. rs[0] = s1[0] - s2[0];
  193. rs[1] = s1[1] - s2[1];
  194. res_hi = *(int*)ra;
  195. res_lo = *(int*)rs;
  196. return (((unsigned long long) res_hi) << 32) | ((unsigned int) res_lo) ;
  197. }
  198. static inline int _avg2(int src1, int src2) {
  199. int low = (((int)1 + (short) src1 + (short) src2) >> 1) & 0XFFFF;
  200. int high1 = src1 >> 16;
  201. int high2 = src2 >> 16;
  202. int high = ((high1 + high2 + 1) >> 1)<< 16;
  203. return high | low;
  204. }
  205. static inline unsigned int _avgu4(unsigned int src1, unsigned int src2) {
  206. unsigned int res0 = ((src1 & 0xFF) + (src2 & 0xFF) + 1) >> 1;
  207. unsigned int res1 = (((src1 & 0xFF00) >> 8) + ((src2 & 0xFF00) >> 8) + 1) >> 1;
  208. unsigned int res2 = (((src1 & 0xFF0000) >> 16) + ((src2 & 0xFF0000) >> 16) + 1) >> 1;
  209. unsigned int res3 = (((src1 & 0xFF000000) >> 24) + ((src2 & 0xFF000000) >> 24) + 1) >> 1;
  210. return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
  211. }
  212. static inline int TEN_popc (unsigned char b)
  213. {
  214. int i, result = 0;
  215. for (i = 0; i < 8; i++){
  216. if (b & 0x1)
  217. result++;
  218. b >>= 1;
  219. }
  220. return result;
  221. }
  222. static inline unsigned int _bitc4(unsigned int src1)
  223. {
  224. unsigned int res0 = TEN_popc(src1 & 0xFF);
  225. unsigned int res1 = TEN_popc((src1 & 0xFF00) >> 8);
  226. unsigned int res2 = TEN_popc((src1 & 0xFF0000) >> 16);
  227. unsigned int res3 = TEN_popc((src1 & 0xFF000000) >> 24);
  228. return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
  229. }
  230. static inline unsigned int _bitr(unsigned int src) {
  231. int i;
  232. unsigned r = 0;
  233. for (i = 0; i< 32; ++i) {
  234. r = r | (((src >> i) & 1)<<(31-i));
  235. }
  236. return r;
  237. }
  238. static inline unsigned int _clr(unsigned int src2, int csta, int cstb)
  239. {
  240. csta &= 0x1f;
  241. cstb &= 0x1f;
  242. if (csta > cstb)
  243. return src2;
  244. else {
  245. unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta;
  246. return src2 & (~mask);
  247. }
  248. }
  249. static inline unsigned int _clrr(unsigned int src2, int src1)
  250. {
  251. unsigned int csta = (src1 >> 5) & 0x1f;
  252. unsigned int cstb = src1 & 0x1f;
  253. if (csta > cstb)
  254. return src2;
  255. else {
  256. unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta;
  257. return src2 & (~mask);
  258. }
  259. }
  260. static inline int _cmpeq2(int src1, int src2) {
  261. short s1[2], s2[2];
  262. int r0, r1;
  263. int result;
  264. *((int*)s1) = src1;
  265. *((int*)s2) = src2;
  266. r0 = s1[0] == s2[0] ? 1 : 0;
  267. r1 = s1[1] == s2[1] ? 1 : 0;
  268. result = (r1 << 1) | r0;
  269. return result;
  270. }
  271. static inline int _cmpeq4(int src1, int src2) {
  272. char s1[4], s2[4];
  273. int r0, r1, r2, r3;
  274. int result;
  275. *((int*)s1) = src1;
  276. *((int*)s2) = src2;
  277. r0 = s1[0] == s2[0] ? 1 : 0;
  278. r1 = s1[1] == s2[1] ? 1 : 0;
  279. r2 = s1[2] == s2[2] ? 1 : 0;
  280. r3 = s1[3] == s2[3] ? 1 : 0;
  281. result = (r3 << 3) | (r2 << 2) | (r1 << 1) | r0;
  282. return result;
  283. }
  284. static inline int _cmpgt2(int src1, int src2) {
  285. short s1[2], s2[2];
  286. int r1, r0;
  287. int result;
  288. *((int*)s1) = src1;
  289. *((int*)s2) = src2;
  290. r0 = s1[0] > s2[0] ? 1 : 0;
  291. r1 = s1[1] > s2[1] ? 1 : 0;
  292. result = (r1<<1) | r0;
  293. return result;
  294. }
  295. static inline unsigned int _cmpgtu4(unsigned int src1, unsigned int src2) {
  296. unsigned int s1_0 = (src1 & 0xFF);
  297. unsigned int s1_1 = (src1 & 0xFF00) >> 8;
  298. unsigned int s1_2 = (src1 & 0xFF0000) >> 16;
  299. unsigned int s1_3 = (src1 & 0xFF000000) >> 24;
  300. unsigned int s2_0 = (src2 & 0xFF);
  301. unsigned int s2_1 = (src2 & 0xFF00) >> 8;
  302. unsigned int s2_2 = (src2 & 0xFF0000) >> 16;
  303. unsigned int s2_3 = (src2 & 0xFF000000) >> 24;
  304. unsigned int result = 0;
  305. if (s1_0 > s2_0)
  306. result |= 0x1;
  307. if (s1_1 > s2_1)
  308. result |= 0x2;
  309. if (s1_2 > s2_2)
  310. result |= 0x4;
  311. if (s1_3 > s2_3)
  312. result |= 0x8;
  313. return result;
  314. }
  315. static inline long long _ddotp4(unsigned int src1, unsigned int src2) {
  316. unsigned int res0, res1;
  317. short s1_0 = (src1 & 0xffff);
  318. short s1_1 = (src1 & 0xfff0000) >> 16;
  319. unsigned short s2_0 = (src2 & 0xff);
  320. unsigned short s2_1 = (src2 & 0xff00) >> 8;
  321. unsigned short s2_2 = (src2 & 0xff0000) >> 16;
  322. unsigned short s2_3 = (src2 & 0xff000000) >> 24;
  323. res0 = ((int)s1_0) * s2_0 + ((int)s1_1) * s2_1;
  324. res1 = ((int)s1_0) * s2_2 + ((int)s1_1) * s2_3;
  325. return (res1 << 16) | res0;
  326. }
  327. static inline long long _ddotph2(long long src1_o_src1_e, unsigned int src2)
  328. {
  329. unsigned int src1_o = src1_o_src1_e >> 32;
  330. unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF;
  331. short ls1_o = src1_o & 0XFFFF;
  332. short hs1_o = src1_o >> 16;
  333. // short ls1_e = src1_e & 0XFFFF;
  334. short hs1_e = src1_e >> 16;
  335. short ls2 = src2 & 0XFFFF;
  336. short hs2 = src2 >> 16;
  337. unsigned long long res_hi = ls2 * ls1_o + hs2 * hs1_o;
  338. unsigned int res_lo = ls1_o * hs2 + hs1_e * ls2;
  339. return (res_hi << 32) | res_lo;
  340. }
  341. static inline long long _ddotpl2(long long src1_o_src1_e, unsigned int src2)
  342. {
  343. unsigned int src1_o = src1_o_src1_e >> 32;
  344. unsigned int src1_e = src1_o_src1_e & 0xFFFFFFFF;
  345. short ls1_o = src1_o & 0XFFFF;
  346. // short hs1_o = src1_o >> 16;
  347. short ls1_e = src1_e & 0XFFFF;
  348. short hs1_e = src1_e >> 16;
  349. short ls2 = src2 & 0XFFFF;
  350. short hs2 = src2 >> 16;
  351. unsigned long long res_hi = ls2 * hs1_e + hs2 * ls1_o;
  352. unsigned res_lo = hs1_e * hs2 + ls1_e * ls2;
  353. return (res_hi << 32) | res_lo;
  354. }
  355. static inline unsigned int _deal(unsigned int src)
  356. {
  357. int i;
  358. unsigned short lo = 0, hi = 0;
  359. for (i = 0; i < 32; i+= 2) {
  360. lo >>= 1;
  361. lo |= (src & 0x1) << 15;
  362. src >>= 1;
  363. hi >>= 1;
  364. hi |= (src & 0x1) << 15;
  365. src >>= 1;
  366. }
  367. return (hi << 16) | lo;
  368. }
  369. static inline long long _dmv(unsigned int src1, unsigned int src2)
  370. {
  371. return (((long long) src1) << 32) | src2;
  372. }
  373. static inline int _dotpn2(int src1, int src2) {
  374. short int s1_h = src1>>16;
  375. short int s1_l = src1;
  376. short int s2_h = src2>>16;
  377. short int s2_l = src2;
  378. return s1_h * s2_h - s1_l * s2_l;
  379. }
  380. static inline int _dotp2(int src1, int src2) {
  381. short int s1_h = src1>>16;
  382. short int s1_l = src1;
  383. short int s2_h = src2>>16;
  384. short int s2_l = src2;
  385. return s1_h * s2_h + s1_l * s2_l;
  386. }
  387. static inline int _dotpnrsu2(int src1, unsigned int src2)
  388. {
  389. short ls1 = src1 & 0XFFFF;
  390. unsigned short ls2 = src2 & 0XFFFF;
  391. short hs1 = src1 >> 16;
  392. unsigned short hs2 = src2 >> 16;
  393. int result = (((long long) (int)(hs1 * hs2)) - ((long long) (int)(ls1 * ls2)) + (1 << 15)) >> 16;
  394. return result;
  395. }
  396. static inline int _dotprsu2(int src1, unsigned int src2) {
  397. short ls1 = src1 & 0XFFFF;
  398. unsigned short ls2 = (src2 & 0XFFFF);
  399. short hs1 = src1 >> 16;
  400. unsigned short hs2 = (src2 >> 16);
  401. int result = (((long long) (int) (ls1 * ls2)) + ((long long) (int) (hs1 * hs2)) + (1LL << 15)) >> 16;
  402. return result;
  403. }
  404. static inline int _dotpsu4(int src1, unsigned int src2) {
  405. int result;
  406. signed char s1_0 = (src1 & 0xff);
  407. signed char s1_1 = (src1 & 0xff00) >> 8;
  408. signed char s1_2 = (src1 & 0xff0000) >> 16;
  409. signed char s1_3 = (src1 & 0xff000000) >> 24;
  410. unsigned int s2_0 = (src2 & 0xff);
  411. unsigned int s2_1 = (src2 & 0xff00) >> 8;
  412. unsigned int s2_2 = (src2 & 0xff0000) >> 16;
  413. unsigned int s2_3 = (src2 & 0xff000000) >> 24;
  414. result = s1_0 * s2_0 + s1_1 * s2_1 + s1_2 * s2_2 + s1_3 * s2_3;
  415. return result;
  416. }
  417. static inline unsigned int _dotpu4(unsigned int src1, unsigned int src2) {
  418. unsigned char v1_0 = src1 & 0xff;
  419. unsigned char v1_1 = (src1>>8) & 0xff;
  420. unsigned char v1_2 = (src1>>16) & 0xff;
  421. unsigned char v1_3 = (src1>>24) & 0xff;
  422. unsigned char v2_0 = src2 & 0xff;
  423. unsigned char v2_1 = (src2>>8) & 0xff;
  424. unsigned char v2_2 = (src2>>16) & 0xff;
  425. unsigned char v2_3 = (src2>>24) & 0xff;
  426. unsigned v = v1_0 * v2_0 + v1_1 * v2_1 + v1_2 * v2_2 + v1_3 * v2_3;
  427. return v;
  428. }
  429. static inline long long _dpack2(unsigned int src1, unsigned int src2){
  430. unsigned short s1[2], s2[2];
  431. *((int*)s1) = src1;
  432. *((int*)s2) = src2;
  433. return ((unsigned long long) s1[1] << 48) | ((unsigned long long) s2[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[0]);
  434. }
  435. static inline long long _dpackx2(unsigned int src1, unsigned int src2){
  436. unsigned short s1[2], s2[2];
  437. *((int*)s1) = src1;
  438. *((int*)s2) = src2;
  439. return ((unsigned long long) s2[0] << 48) | ((unsigned long long) s1[1] << 32) | ((unsigned long long) s1[0] << 16) | ((unsigned long long) s2[1]);
  440. }
  441. static inline int _ext(int src2, unsigned int csta, unsigned int cstb)
  442. {
  443. return (src2 << csta) >> cstb;
  444. }
  445. static inline int _extr(int src2, int src1)
  446. {
  447. unsigned int csta = (src1 >> 5) & 0x1f;
  448. unsigned int cstb = src1 & 0x1f;
  449. return (src2 << csta) >> cstb;
  450. }
  451. static inline unsigned int _extu(unsigned int src2, unsigned int csta, unsigned int cstb)
  452. {
  453. return (src2 << csta) >> cstb;
  454. }
  455. static inline unsigned int _extur(unsigned int src2, int src1)
  456. {
  457. unsigned int csta = (src1 >> 5) & 0x1f;
  458. unsigned int cstb = src1 & 0x1f;
  459. return (src2 << csta) >> cstb;
  460. }
  461. static inline unsigned long long _hi(double src) {
  462. unsigned long long v;
  463. *(double*)&v = src;
  464. return v>>32;
  465. }
  466. static inline unsigned int _hill (long long src)
  467. {
  468. return (unsigned int) (src >> 32);
  469. }
  470. static inline double _itod(unsigned hi, unsigned lo) {
  471. double v;
  472. unsigned long long ll = ((((unsigned long long)(hi))<<32) | (unsigned long long)((unsigned)lo));
  473. *((unsigned long long *)&v) = ll;
  474. return v;
  475. }
  476. static inline long long _itoll(unsigned int src2, unsigned int src1)
  477. {
  478. return (((long long) src2) << 32) | src1;
  479. }
  480. static inline C6X_COMPAT_LONG40 _labs(C6X_COMPAT_LONG40 src2)
  481. {
  482. long long maxv = (1LL << (40 -1)) - 1;
  483. long long minv = (-1LL << (40 - 1));
  484. C6X_COMPAT_LONG40 lres = orig_L40_set(src2);
  485. lres = lres < 0 ? -lres : lres;
  486. if (lres > maxv) lres = maxv;
  487. else if (lres < minv) lres = minv;
  488. return lres;
  489. }
  490. static inline C6X_COMPAT_LONG40 _ldotp2(int src1, int src2) {
  491. return (C6X_COMPAT_LONG40) _dotp2(src1, src2);
  492. }
  493. static inline unsigned int _lmbd(unsigned int src1, unsigned int src2)
  494. {
  495. return norm_shift_amt_U_and_non_U(0,(((int) (src1 << 31)) >> 31) ^ (~src2));
  496. }
  497. static inline unsigned int _lnorm(C6X_COMPAT_LONG40 src2) {
  498. if (src2 == 0)
  499. return 39;
  500. else {
  501. int hi = (int)(src2 >> 32);
  502. int lo = (int)src2;
  503. long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32;
  504. temp = orig_L40_set(temp);
  505. if (temp == 0) return 0;
  506. int cnt = 0;
  507. while (((temp >> 39) & 1) == ((temp >> 38) & 1)) {
  508. temp <<= 1;
  509. cnt++;
  510. }
  511. return cnt;
  512. }
  513. }
  514. static inline unsigned long long _lo(double src) {
  515. unsigned long long v;
  516. *(double*)&v = src;
  517. return v;
  518. }
  519. static inline unsigned int _loll (long long src)
  520. {
  521. return (unsigned int) src;
  522. }
  523. static inline C6X_COMPAT_LONG40 _lsadd(int src1, C6X_COMPAT_LONG40 src2)
  524. {
  525. long long maxv = (1LL << (40 -1)) - 1;
  526. long long minv = (-1LL << (40 - 1));
  527. int hi = (int)(src2 >> 32);
  528. int lo = (int)src2;
  529. long long src2_int = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32;
  530. long long src2_int2 = orig_L40_set(src2_int);
  531. long long res = src1 + src2_int2;
  532. if (res > maxv) {
  533. res = maxv;
  534. _overflow = 1;
  535. }
  536. else if (res < minv) {
  537. res = minv;
  538. _overflow = 1;
  539. }
  540. long long res2 = orig_L40_set(res);
  541. res2 = (signed char)(res2 >> 32);
  542. C6X_COMPAT_LONG40 lres = (((C6X_COMPAT_LONG40) res2) << 32) | ((unsigned int)res);
  543. return lres;
  544. }
  545. static inline int _max2 (int src1, int src2) {
  546. short s1[2], s2[2], r[2];
  547. int result;
  548. *((int*)s1) = src1;
  549. *((int*)s2) = src2;
  550. r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
  551. r[1] = s1[1] > s2[1] ? s1[1] : s2[1];
  552. result = *(int*)r;
  553. return result;
  554. }
  555. static inline unsigned int _maxu4(unsigned int src1, unsigned int src2) {
  556. unsigned int res0, res1, res2, res3;
  557. unsigned int s1_0 = res0 = (src1 & 0xFF);
  558. unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8;
  559. unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16;
  560. unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24;
  561. unsigned int s2_0 = (src2 & 0xFF);
  562. unsigned int s2_1 = (src2 & 0xFF00) >> 8;
  563. unsigned int s2_2 = (src2 & 0xFF0000) >> 16;
  564. unsigned int s2_3 = (src2 & 0xFF000000) >> 24;
  565. // unsigned int res = 0;
  566. if (s1_0 < s2_0)
  567. res0 = s2_0;
  568. if (s1_1 < s2_1)
  569. res1 = s2_1;
  570. if (s1_2 < s2_2)
  571. res2 = s2_2;
  572. if (s1_3 < s2_3)
  573. res3 = s2_3;
  574. return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
  575. }
  576. static inline int _min2(int src1, int src2) {
  577. short s1[2], s2[2], r[2];
  578. int result;
  579. *((int*)s1) = src1;
  580. *((int*)s2) = src2;
  581. r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
  582. r[1] = s1[1] < s2[1] ? s1[1] : s2[1];
  583. result = *(int*)r;
  584. return result;
  585. }
  586. static inline unsigned int _minu4(unsigned int src1, unsigned int src2) {
  587. unsigned int res0, res1, res2, res3;
  588. unsigned int s1_0 = res0 = (src1 & 0xFF);
  589. unsigned int s1_1 = res1 = (src1 & 0xFF00) >> 8;
  590. unsigned int s1_2 = res2 = (src1 & 0xFF0000) >> 16;
  591. unsigned int s1_3 = res3 = (src1 & 0xFF000000) >> 24;
  592. unsigned int s2_0 = (src2 & 0xFF);
  593. unsigned int s2_1 = (src2 & 0xFF00) >> 8;
  594. unsigned int s2_2 = (src2 & 0xFF0000) >> 16;
  595. unsigned int s2_3 = (src2 & 0xFF000000) >> 24;
  596. // unsigned int res = 0;
  597. if (s1_0 > s2_0)
  598. res0 = s2_0;
  599. if (s1_1 > s2_1)
  600. res1 = s2_1;
  601. if (s1_2 > s2_2)
  602. res2 = s2_2;
  603. if (s1_3 > s2_3)
  604. res3 = s2_3;
  605. return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
  606. }
  607. static inline int _mpy(int src1, int src2) {
  608. return (short) src1 * (short) src2;
  609. }
  610. static inline int _mpyh(int src1, int src2) {
  611. return (short) (src1 >> 16) * (short) (src2 >> 16);
  612. }
  613. static inline long long _mpyhill (int src1, int src2)
  614. {
  615. short s1 = src1 >> 16;
  616. return ((long long) src2) * s1;
  617. }
  618. static inline int _mpyhir(int src1, int src2)
  619. {
  620. short s1 = src1 >> 16;
  621. long long result = ((long long) src2) * s1 + (1 << 14);
  622. result >>= 15;
  623. return result;
  624. }
  625. static inline int _mpyhl(int src1, int src2) {
  626. return (short) (src1 >> 16) * (short) (src2);
  627. }
  628. static inline unsigned int _mpyhlu(unsigned int src1, unsigned int src2) {
  629. return (unsigned short) (src1 >> 16) * (unsigned short) (src2);
  630. }
  631. static inline int _mpyhslu(int src1, unsigned int src2) {
  632. return (short) (src1 >> 16) * (unsigned short) src2;
  633. }
  634. static inline int _mpyhsu(int src1, unsigned int src2) {
  635. return (short) (src1 >>16) * (unsigned short) (src2 >>16);
  636. }
  637. static inline unsigned int _mpyhu(unsigned int src1, unsigned int src2) {
  638. return (unsigned short) (src1 >>16) * (unsigned short) (src2 >> 16);
  639. }
  640. static inline int _mpyhuls(unsigned int src1, int src2) {
  641. return (unsigned short) (src1 >>16) * (signed short) (src2);
  642. }
  643. static inline int _mpyhus(unsigned int src1, int src2) {
  644. return (unsigned short) (src1 >> 16) * (short) (src2 >>16);
  645. }
  646. static inline long long _mpyidll (int src1, int src2)
  647. {
  648. return (long long) src1 * src2;
  649. }
  650. static inline int _mpylh(int src1, int src2) {
  651. return (signed short) (src1 & 0xffff) * (signed short) (src2 >> 16);
  652. }
  653. static inline unsigned int _mpylhu(unsigned int src1, unsigned int src2) {
  654. return (unsigned short) src1 * (unsigned short) (src2 >> 16);
  655. }
  656. static inline long long _mpylill (int src1, int src2)
  657. {
  658. return ((long long) src2) * ((short)src1);
  659. }
  660. static inline int _mpylir(int src1, int src2)
  661. {
  662. short s1 = src1;
  663. long long result = ((long long) src2) * s1 + (1 << 14);
  664. result >>= 15;
  665. return result;
  666. }
  667. static inline int _mpylshu(int src1, unsigned int src2) {
  668. return (short) src1 * (unsigned short) (src2 >> 16);
  669. }
  670. static inline int _mpyluhs(unsigned int src1, int src2) {
  671. return (unsigned short) src1 * (short) (src2 >> 16);
  672. }
  673. static inline int _mpysu(int src1, unsigned int src2) {
  674. return (short) src1 * (unsigned short) src2;
  675. }
  676. static inline long long _mpysu4ll (int src1, unsigned int src2) {
  677. unsigned short res0, res1, res2, res3;
  678. signed char s1_0 = (src1 & 0xff);
  679. signed char s1_1 = (src1 & 0xff00) >> 8;
  680. signed char s1_2 = (src1 & 0xff0000) >> 16;
  681. signed char s1_3 = (src1 & 0xff000000) >> 24;
  682. unsigned short s2_0 = (src2 & 0xff);
  683. unsigned short s2_1 = (src2 & 0xff00) >> 8;
  684. unsigned short s2_2 = (src2 & 0xff0000) >> 16;
  685. unsigned short s2_3 = (src2 & 0xff000000) >> 24;
  686. res0 = s1_0 * s2_0;
  687. res1 = s1_1 * s2_1;
  688. res2 = s1_2 * s2_2;
  689. res3 = s1_3 * s2_3;
  690. return (((unsigned long long) res3) << 48)
  691. | (((unsigned long long) res2) << 32)
  692. | (((unsigned long long) res1) << 16)
  693. | res0;
  694. }
  695. static inline unsigned int _mpyu(unsigned int src1, unsigned int src2) {
  696. unsigned v = (unsigned short)src1 * (unsigned short)src2;
  697. return v;
  698. }
  699. static inline int _mpyus(unsigned int src1, int src2) {
  700. return (unsigned short) src1 * (short) src2;
  701. }
  702. static inline long long _mpyu4ll (unsigned int src1, unsigned int src2) {
  703. unsigned short res0, res1, res2, res3;
  704. unsigned char s1_0 = (src1 & 0xff);
  705. unsigned char s1_1 = (src1 & 0xff00) >> 8;
  706. unsigned char s1_2 = (src1 & 0xff0000) >> 16;
  707. unsigned char s1_3 = (src1 & 0xff000000) >> 24;
  708. unsigned short s2_0 = (src2 & 0xff);
  709. unsigned short s2_1 = (src2 & 0xff00) >> 8;
  710. unsigned short s2_2 = (src2 & 0xff0000) >> 16;
  711. unsigned short s2_3 = (src2 & 0xff000000) >> 24;
  712. res0 = s1_0 * s2_0;
  713. res1 = s1_1 * s2_1;
  714. res2 = s1_2 * s2_2;
  715. res3 = s1_3 * s2_3;
  716. return (((unsigned long long) res3) << 48)
  717. | (((unsigned long long) res2) << 32)
  718. | (((unsigned long long) res1) << 16)
  719. | res0;
  720. }
  721. static inline long long _mpy2ir(unsigned int src1, unsigned int src2)
  722. {
  723. if ((src1 == 0x8000) && (src2 == 0x80000000)) {
  724. _overflow = 1;
  725. return 0;
  726. }
  727. else {
  728. short ls1 = src1 & 0xffff;
  729. short hs1 = src1 >> 16;
  730. unsigned long long hi = (((long long) hs1) * (int) src2 + (1 << 14)) >> 15;
  731. unsigned long long lo = ((((long long) ls1) * (int) src2 + (1 << 14)) >> 15) & 0xFFFFFFFF;
  732. return (hi << 32) | lo;
  733. }
  734. }
  735. static inline long long _mpy2ll (int src1, int src2) {
  736. short ls1 = src1 & 0xffff;
  737. short hs1 = src1 >> 16;
  738. short ls2 = src2 & 0xffff;
  739. short hs2 = src2 >> 16;
  740. unsigned long long hi = hs1 * hs2;
  741. unsigned long long lo = (ls1 * ls2) & 0xFFFFFFFF;
  742. return (hi << 32) | lo;
  743. }
  744. static inline int _mpy32(int src1, int src2)
  745. {
  746. return src1 * src2;
  747. }
  748. static inline long long _mpy32ll(int src1, int src2)
  749. {
  750. return ((long long) src1) * src2;
  751. }
  752. static inline long long _mpy32su(int src1, unsigned int src2)
  753. {
  754. return ((long long) src1) * ((int) src2);
  755. }
  756. static inline long long _mpy32u(unsigned int src1, unsigned int src2)
  757. {
  758. return ((long long) ((int) src1)) * ((long long) ((int) src2));
  759. }
  760. static inline long long _mpy32us(unsigned int src1, int src2)
  761. {
  762. return ((int) src1) * ((long long) src2);
  763. }
  764. static inline int _mvd (int src2)
  765. {
  766. return src2;
  767. }
  768. static inline unsigned int _norm(int src2)
  769. {
  770. return norm_shift_amt_U_and_non_U(1,src2);
  771. }
  772. static inline unsigned int _pack2 (unsigned int src1, unsigned int src2) {
  773. short s1[2], s2[2], r[2];
  774. int result;
  775. *((int*)s1) = src1;
  776. *((int*)s2) = src2;
  777. r[0] = s2[0];
  778. r[1] = s1[0];
  779. result = *(int*)r;
  780. return result;
  781. }
  782. static inline int _packh2 (unsigned int src1, unsigned int src2) {
  783. unsigned v0 = src1 & 0xffff0000;
  784. unsigned v1 = src2 >> 16;
  785. unsigned v = v0|v1;
  786. return v;
  787. }
  788. static inline unsigned int _packh4 (unsigned int src1, unsigned int src2) {
  789. unsigned v3 = (src1 >> 24) & 0xff;
  790. unsigned v2 = (src1 >> 8) & 0xff;
  791. unsigned v1 = (src2 >> 24) & 0xff;
  792. unsigned v0 = (src2 >> 8) & 0xff;
  793. unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0;
  794. return v;
  795. }
  796. static inline unsigned int _packhl2 (unsigned int src1, unsigned int src2) {
  797. unsigned v0 = src1 & 0xffff0000;
  798. unsigned v1 = src2 & 0x0000ffff;
  799. unsigned v = v0|v1;
  800. return v;
  801. }
  802. static inline unsigned int _packlh2 (unsigned int src1, unsigned int src2) {
  803. unsigned v0 = src1 << 16;
  804. unsigned v1 = (src2 >> 16) & 0xffff;
  805. unsigned v = v0|v1;
  806. return v;
  807. }
  808. static inline unsigned int _packl4 (unsigned int src1, unsigned int src2) {
  809. unsigned v3 = (src1 >> 16) & 0xff;
  810. unsigned v2 = (src1) & 0xff;
  811. unsigned v1 = (src2 >> 16) & 0xff;
  812. unsigned v0 = (src2) & 0xff;
  813. unsigned v = (v3<<24) | (v2<<16) | (v1 << 8) | v0;
  814. return v;
  815. }
  816. static inline unsigned int _rpack2 (unsigned int src1, unsigned int src2) {
  817. int s1 = (int) src1;
  818. int s2 = (int) src2;
  819. s1 = util_shift_right_saturate_n (s1, -1, 32);
  820. s2 = util_shift_right_saturate_n (s2, -1, 32);
  821. return (unsigned int) (s1 & 0xffff0000) | (unsigned int) ((s2 & 0xffff0000) >>16);
  822. }
  823. static inline unsigned int _rotl (unsigned int src1, unsigned int src2)
  824. {
  825. src2 &= 0x1f;
  826. return (src1 << src2) | (src1 >> (32 - src2));
  827. }
  828. static inline int _sadd(int src1, int src2) {
  829. signed long long res;
  830. signed long long maxv, minv;
  831. maxv = (1LL << (32-1)) - 1;
  832. minv = (-1LL << (32-1));
  833. res = (long long) src1 + (long long) src2;
  834. if (res > maxv) {
  835. res = maxv;
  836. _overflow = 1;
  837. }
  838. else if (res < minv ) {
  839. res = minv;
  840. _overflow = 1;
  841. }
  842. return (int) res;
  843. }
  844. static inline long long _saddsub(unsigned int src1, unsigned int src2) {
  845. int radd;
  846. signed long long rsub;
  847. signed long long maxv, minv;
  848. maxv = (1LL << (32-1)) - 1;
  849. minv = (-1LL << (32-1));
  850. radd = (int) src1 + (int) src2;
  851. // saturate on subtract, not add
  852. rsub = (long long) ((int) src1) - (long long) ((int) src2);
  853. if (rsub > maxv) {
  854. rsub = maxv;
  855. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  856. /* _overflow = 1; */
  857. }
  858. else if (rsub < minv ) {
  859. rsub = minv;
  860. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  861. /* _overflow = 1; */
  862. }
  863. return (((unsigned long long) radd) << 32) | ( rsub & 0x00000000ffffffff ) ;
  864. }
  865. static inline long long _saddsub2(unsigned int src1, unsigned int src2) {
  866. signed int radd[2];
  867. signed int rsub[2];
  868. signed short s1[2], s2[2];
  869. signed int maxv, minv;
  870. maxv = (1L << (16-1)) - 1;
  871. minv = (-1L << (16-1));
  872. *((int*)s1) = src1;
  873. *((int*)s2) = src2;
  874. radd[0] = (int) s1[0] + (int) s2[0];
  875. radd[1] = (int) s1[1] + (int) s2[1];
  876. rsub[0] = (int) s1[0] - (int) s2[0];
  877. rsub[1] = (int) s1[1] - (int) s2[1];
  878. if (radd[0] > maxv) {
  879. radd[0] = maxv;
  880. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  881. /* _overflow = 1; */
  882. }
  883. else if (radd[0] < minv ) {
  884. radd[0] = minv;
  885. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  886. /* _overflow = 1; */
  887. }
  888. if (radd[1] > maxv) {
  889. radd[1] = maxv;
  890. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  891. /* _overflow = 1; */
  892. }
  893. else if (radd[1] < minv ) {
  894. radd[1] = minv;
  895. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  896. /* _overflow = 1; */
  897. }
  898. if (rsub[0] > maxv) {
  899. rsub[0] = maxv;
  900. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  901. /* _overflow = 1; */
  902. }
  903. else if (rsub[0] < minv ) {
  904. rsub[0] = minv;
  905. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  906. /* _overflow = 1; */
  907. }
  908. if (rsub[1] > maxv) {
  909. rsub[1] = maxv;
  910. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  911. /* _overflow = 1; */
  912. }
  913. else if (rsub[1] < minv ) {
  914. rsub[1] = minv;
  915. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  916. /* _overflow = 1; */
  917. }
  918. return ((((unsigned long long) radd[1]) & 0x000000000000ffff) << 48) |
  919. ((((unsigned long long) radd[0]) & 0x000000000000ffff) << 32) |
  920. ((((unsigned long long) rsub[1]) & 0x000000000000ffff) << 16) |
  921. ((((unsigned long long) rsub[0]) & 0x000000000000ffff));
  922. }
  923. static inline int _sadd2(int src1, int src2) {
  924. signed short s1[2], s2[2];
  925. signed int r[2], maxv, minv;
  926. maxv = (1L << (16-1)) - 1;
  927. minv = (-1L << (16-1));
  928. *((int*)s1) = src1;
  929. *((int*)s2) = src2;
  930. r[0] = (int) s1[0] + (int) s2[0];
  931. r[1] = (int) s1[1] + (int) s2[1];
  932. if (r[0] > maxv) {
  933. r[0] = maxv;
  934. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  935. /* _overflow = 1; */
  936. }
  937. else if (r[0] < minv ) {
  938. r[0] = minv;
  939. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  940. /* _overflow = 1; */
  941. }
  942. if (r[1] > maxv) {
  943. r[1] = maxv;
  944. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  945. /* _overflow = 1; */
  946. }
  947. else if (r[1] < minv ) {
  948. r[1] = minv;
  949. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  950. /* _overflow = 1; */
  951. }
  952. return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ;
  953. }
  954. static inline int _saddus2(unsigned int src1, int src2) {
  955. int res0, res1;
  956. unsigned int s1_0 = (src1 & 0xffff);
  957. unsigned int s1_1 = (src1 & 0xffff0000) >> 16;
  958. short s2_0 = (src2 & 0xffff);
  959. short s2_1 = (src2 & 0xffff0000) >> 16;
  960. res0 = s1_0 + s2_0;
  961. res1 = s1_1 + s2_1;
  962. if (res0 >= 0x10000)
  963. res0 = 0xffff;
  964. else if (res0 < 0)
  965. res0 = 0;
  966. if (res1 >= 0x10000)
  967. res1 = 0xffff;
  968. else if (res1 < 0)
  969. res1 = 0;
  970. return (res1 << 16) | res0;
  971. }
  972. static inline unsigned int _saddu4(unsigned int src1, unsigned int src2) {
  973. unsigned int res0, res1, res2, res3;
  974. unsigned int s1_0 = (src1 & 0xff);
  975. unsigned int s1_1 = (src1 & 0xff00) >> 8;
  976. unsigned int s1_2 = (src1 & 0xff0000) >> 16;
  977. unsigned int s1_3 = (src1 & 0xff000000) >> 24;
  978. unsigned int s2_0 = (src2 & 0xff);
  979. unsigned int s2_1 = (src2 & 0xff00) >> 8;
  980. unsigned int s2_2 = (src2 & 0xff0000) >> 16;
  981. unsigned int s2_3 = (src2 & 0xff000000) >> 24;
  982. res0 = s1_0 + s2_0;
  983. res1 = s1_1 + s2_1;
  984. res2 = s1_2 + s2_2;
  985. res3 = s1_3 + s2_3;
  986. if (res0 >= 0x100)
  987. res0 = 0xff;
  988. if (res1 >= 0x100)
  989. res1 = 0xff;
  990. if (res2 >= 0x100)
  991. res2 = 0xff;
  992. if (res3 >= 0x100)
  993. res3 = 0xff;
  994. return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
  995. }
  996. static inline int _sat(C6X_COMPAT_LONG40 src2)
  997. {
  998. long long maxv = (1LL << (32-1)) - 1;
  999. long long minv = (-1LL << (32-1));
  1000. int hi = (int)(src2 >> 32);
  1001. int lo = (int)src2;
  1002. long long temp = (unsigned long long)(unsigned)lo | (unsigned long long)hi << 32;
  1003. temp = orig_L40_set(temp);
  1004. if (temp > maxv) {
  1005. temp = maxv;
  1006. _overflow = 1;
  1007. }
  1008. else if (temp < minv) {
  1009. temp = minv;
  1010. _overflow = 1;
  1011. }
  1012. return (int) temp;
  1013. }
  1014. static inline unsigned int _set(unsigned int src2, unsigned int csta, unsigned int cstb)
  1015. {
  1016. csta &= 0x1f;
  1017. cstb &= 0x1f;
  1018. if (csta > cstb)
  1019. return src2;
  1020. else {
  1021. unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta;
  1022. return src2 | mask;
  1023. }
  1024. }
  1025. static inline unsigned int _setr(unsigned int src2, int src1)
  1026. {
  1027. unsigned int csta = (src1 >> 5) & 0x1f;
  1028. unsigned int cstb = src1 & 0x1f;
  1029. if (csta > cstb)
  1030. return src2;
  1031. else {
  1032. unsigned int mask = (((1 << (cstb - csta)) << 1) - 1) << csta;
  1033. return src2 | mask;
  1034. }
  1035. }
  1036. static inline unsigned int _shfl (unsigned int src2)
  1037. {
  1038. unsigned short lo = src2;
  1039. unsigned short hi = src2 >> 16;
  1040. unsigned int result = 0;
  1041. int i;
  1042. for (i = 0; i < 32; i+= 2) {
  1043. result >>= 1;
  1044. result |= (lo & 0x1) << 31;
  1045. lo >>= 1;
  1046. result >>= 1;
  1047. result |= (hi & 0x1) << 31;
  1048. hi >>= 1;
  1049. }
  1050. return result;
  1051. }
  1052. static inline long long _shfl3 (unsigned int src1, unsigned int src2)
  1053. {
  1054. unsigned short lo = src2;
  1055. unsigned short hi = src1 >> 16;
  1056. unsigned short mid = src1;
  1057. unsigned long long result = 0;
  1058. int i;
  1059. for (i = 0; i < 32; i+= 2) {
  1060. result >>= 1;
  1061. result |= ((unsigned long long) (lo & 0x1)) << 47;
  1062. lo >>= 1;
  1063. result >>= 1;
  1064. result |= ((unsigned long long) (mid & 0x1)) << 47;
  1065. mid >>= 1;
  1066. result >>= 1;
  1067. result |= ((unsigned long long) (hi & 0x1)) << 47;
  1068. hi >>= 1;
  1069. }
  1070. return result;
  1071. }
  1072. static inline unsigned int _shlmb (unsigned int src1, unsigned int src2)
  1073. {
  1074. return (src2 << 8) | (src1 >> 24);
  1075. }
  1076. static inline unsigned int _shrmb (unsigned int src1, unsigned int src2)
  1077. {
  1078. return (src2 >> 8) | (src1 << 24);
  1079. }
  1080. static inline unsigned int _shru2 (unsigned int src1, unsigned int src2) {
  1081. unsigned short hs1 = src1 >> 16;
  1082. unsigned short ls1 = src1 & 0xFFFF;
  1083. hs1 >>= src2;
  1084. ls1 >>= src2;
  1085. return (hs1 << 16) | ls1;
  1086. }
  1087. static inline int _shr2 (int src1, unsigned int src2) {
  1088. short s1[2], result[2];
  1089. *((int*)s1) = src1;
  1090. src2 = src2 & 31;
  1091. result[0] = (int)s1[0] >> src2;
  1092. result[1] = (int)s1[1] >> src2;
  1093. return *(int*)result;
  1094. }
  1095. static inline int _smpy (int src1, int src2) {
  1096. unsigned long long result;
  1097. result = (((short) src1 * (short) src2) << 1);
  1098. if ((result & 0xffffffff) == 0x80000000){
  1099. result = 0x7fffffff;
  1100. _overflow = 1;
  1101. }
  1102. return (int) (result);
  1103. }
  1104. static inline int _smpyh (int src1, int src2) {
  1105. unsigned long long result;
  1106. result = ((short) (src1 >> 16) * (short) (src2 >> 16)) << 1;
  1107. if ((result & 0xffffffff) == 0x80000000){
  1108. result = 0x7fffffff;
  1109. _overflow = 1;
  1110. }
  1111. return (int) (result);
  1112. }
  1113. static inline int _smpyhl (int src1, int src2) {
  1114. unsigned long long result;
  1115. result = ((short) (src1 >> 16) * (short) (src2)) << 1;
  1116. if ((result & 0xffffffff) == 0x80000000){
  1117. result = 0x7fffffff;
  1118. _overflow = 1;
  1119. }
  1120. return (int) (result);
  1121. }
  1122. static inline int _smpylh (int src1, int src2) {
  1123. unsigned long long result;
  1124. result = ((short) (src1) * (short) (src2 >> 16)) << 1;
  1125. if ((result & 0xffffffff) == 0x80000000){
  1126. result = 0x7fffffff;
  1127. _overflow = 1;
  1128. }
  1129. return (int) (result);
  1130. }
  1131. static inline long long _smpy2ll (int src1, int src2) {
  1132. short ls1 = src1 & 0XFFFF;
  1133. short hs1 = src1 >> 16;
  1134. short ls2 = src2 & 0XFFFF;
  1135. short hs2 = src2 >> 16;
  1136. unsigned long long hi = (hs1 * hs2) << 1;
  1137. unsigned long long lo = ((ls1 * ls2) << 1) & 0xFFFFFFFF;
  1138. if ((hi & 0xffffffff) == 0x80000000){
  1139. hi = 0x7fffffff;
  1140. _overflow = 1;
  1141. }
  1142. if ((lo & 0xffffffff) == 0x80000000){
  1143. lo = 0x7fffffff;
  1144. _overflow = 1;
  1145. }
  1146. return (hi << 32) | lo;
  1147. }
  1148. static inline int _smpy32(int src1, int src2)
  1149. {
  1150. long long res = (long long) src1 * src2;
  1151. res <<= 1;
  1152. res >>= 32;
  1153. return res;
  1154. }
  1155. static inline unsigned char TEN_satu8 (short src)
  1156. {
  1157. if (src > 0xff)
  1158. return 0xff;
  1159. else if (src < 0)
  1160. return 0;
  1161. else
  1162. return src;
  1163. }
  1164. static inline int _spack2 (int src1, int src2) {
  1165. short s1 = (short) util_saturate_n_no_state(src1,16);
  1166. short s2 = (short) util_saturate_n_no_state(src2,16);
  1167. return ( (unsigned int) s1 << 16) | (((int) s2) & 0xFFFF);
  1168. }
  1169. static inline unsigned int _spacku4 (int src1, int src2) {
  1170. short lolo = src2;
  1171. short lohi = src2 >> 16;
  1172. short hilo = src1;
  1173. short hihi = src1 >> 16;
  1174. lolo = TEN_satu8(lolo);
  1175. lohi = TEN_satu8(lohi);
  1176. hilo = TEN_satu8(hilo);
  1177. hihi = TEN_satu8(hihi);
  1178. return (((unsigned int) hihi) << 24) | (((unsigned int) hilo) << 16) | (lohi << 8) | lolo;
  1179. }
  1180. static inline int _sshl (int src1, unsigned int src2) {
  1181. short local2 = (short)(src2 & 0x7FFF);
  1182. return (int) util_shift_right_saturate_n(src1, -local2, 32);
  1183. }
  1184. static inline int _sshvl (int src2, int src1) {
  1185. short s1;
  1186. if (src1 > 31)
  1187. s1 = 31;
  1188. else if (src1 < -31)
  1189. s1 = -31;
  1190. else
  1191. s1 = src1;
  1192. return (int) util_shift_right_saturate_n(src2, -s1, 32);
  1193. }
  1194. static inline int _sshvr (int src2, int src1) {
  1195. short s1;
  1196. if (src1 > 31)
  1197. s1 = 31;
  1198. else if (src1 < -31)
  1199. s1 = -31;
  1200. else
  1201. s1 = src1;
  1202. return (int) util_shift_right_saturate_n(src2, s1, 32);
  1203. }
  1204. static inline int _ssub(int src1, int src2) {
  1205. signed long long res;
  1206. signed long long maxv, minv;
  1207. maxv = (1LL << (32-1)) - 1;
  1208. minv = (-1LL << (32-1));
  1209. res = (long long) src1 - (long long) src2;
  1210. if (res > maxv) {
  1211. res = maxv;
  1212. _overflow = 1;
  1213. }
  1214. else if (res < minv ) {
  1215. res = minv;
  1216. _overflow = 1;
  1217. }
  1218. return (int) res;
  1219. }
  1220. static inline int _ssub2(int src1, int src2) {
  1221. signed short s1[2], s2[2];
  1222. signed int r[2], maxv, minv;
  1223. maxv = (1L << (16-1)) - 1;
  1224. minv = (-1L << (16-1));
  1225. *((int*)s1) = src1;
  1226. *((int*)s2) = src2;
  1227. r[0] = (int) s1[0] - (int) s2[0];
  1228. r[1] = (int) s1[1] - (int) s2[1];
  1229. if (r[0] > maxv) {
  1230. r[0] = maxv;
  1231. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  1232. /* _overflow = 1; */
  1233. }
  1234. else if (r[0] < minv ) {
  1235. r[0] = minv;
  1236. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  1237. /* _overflow = 1; */
  1238. }
  1239. if (r[1] > maxv) {
  1240. r[1] = maxv;
  1241. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  1242. /* _overflow = 1; */
  1243. }
  1244. else if (r[1] < minv ) {
  1245. r[1] = minv;
  1246. /* NOTE: TI c6x does NOT set the overflow register even if results saturate */
  1247. /* _overflow = 1; */
  1248. }
  1249. return ((r[1] & 0xffff) << 16 ) | (r[0] & 0xffff) ;
  1250. }
  1251. static inline int _subabs4 (int src1, int src2) {
  1252. int res0, res1, res2, res3;
  1253. unsigned int s1_0 = (src1 & 0xff);
  1254. unsigned int s1_1 = (src1 & 0xff00) >> 8;
  1255. unsigned int s1_2 = (src1 & 0xff0000) >> 16;
  1256. unsigned int s1_3 = (src1 & 0xff000000) >> 24;
  1257. unsigned int s2_0 = (src2 & 0xff);
  1258. unsigned int s2_1 = (src2 & 0xff00) >> 8;
  1259. unsigned int s2_2 = (src2 & 0xff0000) >> 16;
  1260. unsigned int s2_3 = (src2 & 0xff000000) >> 24;
  1261. res0 = s1_0 - s2_0;
  1262. res1 = s1_1 - s2_1;
  1263. res2 = s1_2 - s2_2;
  1264. res3 = s1_3 - s2_3;
  1265. if (res0 < 0)
  1266. res0 = -res0;
  1267. if (res1 < 0)
  1268. res1 = -res1;
  1269. if (res2 < 0)
  1270. res2 = -res2;
  1271. if (res3 < 0)
  1272. res3 = -res3;
  1273. return (res3 << 24) | (res2 << 16) | (res1 << 8) | res0;
  1274. }
  1275. static inline unsigned int _subc (unsigned int src1, unsigned int src2)
  1276. {
  1277. if ( src1 >= src2)
  1278. return ((src1 - src2) << 1) + 1;
  1279. else
  1280. return src1 << 1;
  1281. }
  1282. static inline int _sub2(int src1, int src2) {
  1283. short s1[2], s2[2], r[2];
  1284. int result;
  1285. *((int*)s1) = src1;
  1286. *((int*)s2) = src2;
  1287. r[0] = s1[0] - s2[0];
  1288. r[1] = s1[1] - s2[1];
  1289. result = *(int*)r;
  1290. return result;
  1291. }
  1292. static inline int _sub4(int src1, int src2) {
  1293. char c1[4], c2[4], r[4];
  1294. int result;
  1295. *((int*)c1) = src1;
  1296. *((int*)c2) = src2;
  1297. r[0] = c1[0] - c2[0];
  1298. r[1] = c1[1] - c2[1];
  1299. r[2] = c1[2] - c2[2];
  1300. r[3] = c1[3] - c2[3];
  1301. result = *(int*)r;
  1302. return result;
  1303. }
  1304. static inline int _swap4 (unsigned int src1) {
  1305. unsigned char v0 = src1;
  1306. unsigned char v1 = src1 >> 8;
  1307. unsigned char v2 = src1 >> 16;
  1308. unsigned char v3 = src1 >> 24;
  1309. unsigned v = v0<<8 | v1 | v2<<24 | v3<<16;
  1310. return v;
  1311. }
  1312. static inline unsigned int _unpkhu4 (unsigned int src1) {
  1313. unsigned v0 = src1>>24;
  1314. unsigned v1 = (src1>>16) & 0xff;
  1315. return (v0<<16) | v1;
  1316. }
  1317. static inline unsigned int _unpklu4 (unsigned int src1) {
  1318. unsigned v1 = (src1>>8) & 0xff;
  1319. unsigned v0 = (src1) & 0xff;
  1320. return (v1<<16) | v0;
  1321. }
  1322. static inline unsigned int _xpnd2 (unsigned int src1) {
  1323. int v0 = (src1 & 0x1) ? 0x0000ffff : 0x00000000;
  1324. int v1 = (src1 & 0x2) ? 0xffff0000 : 0x00000000;
  1325. return v0|v1;
  1326. }
  1327. static inline unsigned int _xpnd4 (unsigned int src1) {
  1328. int v0 = (src1 & 0x1) ? 0x000000ff : 0x00000000;
  1329. int v1 = (src1 & 0x2) ? 0x0000ff00 : 0x00000000;
  1330. int v2 = (src1 & 0x4) ? 0x00ff0000 : 0x00000000;
  1331. int v3 = (src1 & 0x8) ? 0xff000000 : 0x00000000;
  1332. int r = v0|v1|v2|v3;
  1333. return r;
  1334. }
  1335. // end of Implemented in alphabetical order
  1336. #endif /* __C6X_COMPAT__H */