arm_boolean_distance_template.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_boolean_distance.c
  4. * Description: Templates for boolean distances
  5. *
  6. *
  7. * Target Processor: Cortex-M cores
  8. * -------------------------------------------------------------------- */
  9. /*
  10. * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  11. *
  12. * SPDX-License-Identifier: Apache-2.0
  13. *
  14. * Licensed under the Apache License, Version 2.0 (the License); you may
  15. * not use this file except in compliance with the License.
  16. * You may obtain a copy of the License at
  17. *
  18. * www.apache.org/licenses/LICENSE-2.0
  19. *
  20. * Unless required by applicable law or agreed to in writing, software
  21. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  22. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23. * See the License for the specific language governing permissions and
  24. * limitations under the License.
  25. */
  26. /**
  27. * @defgroup DISTANCEF Distance Functions
  28. *
  29. * Computes Distances between vectors.
  30. *
  31. * Distance functions are useful in a lot of algorithms.
  32. *
  33. */
  34. /**
  35. * @addtogroup DISTANCEF
  36. * @{
  37. */
  38. /**
  39. * @brief Elements of boolean distances
  40. *
  41. * Different values which are used to compute boolean distances
  42. *
  43. * @param[in] pA First vector of packed booleans
  44. * @param[in] pB Second vector of packed booleans
  45. * @param[in] numberOfBools Number of booleans
  46. * @param[out] cTT cTT value
  47. * @param[out] cTF cTF value
  48. * @param[out] cFT cFT value
  49. * @return None
  50. *
  51. */
  52. #define _FUNC(A,B) A##B
  53. #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
  54. #if defined(ARM_MATH_MVEI)
  55. #include "arm_common_tables.h"
  56. void FUNC(EXT)(const uint32_t *pA
  57. , const uint32_t *pB
  58. , uint32_t numberOfBools
  59. #ifdef TT
  60. , uint32_t *cTT
  61. #endif
  62. #ifdef FF
  63. , uint32_t *cFF
  64. #endif
  65. #ifdef TF
  66. , uint32_t *cTF
  67. #endif
  68. #ifdef FT
  69. , uint32_t *cFT
  70. #endif
  71. )
  72. {
  73. #ifdef TT
  74. uint32_t _ctt=0;
  75. #endif
  76. #ifdef FF
  77. uint32_t _cff=0;
  78. #endif
  79. #ifdef TF
  80. uint32_t _ctf=0;
  81. #endif
  82. #ifdef FT
  83. uint32_t _cft=0;
  84. #endif
  85. uint32_t a, b, ba, bb;
  86. int shift;
  87. const uint8_t *pA8 = (const uint8_t *) pA;
  88. const uint8_t *pB8 = (const uint8_t *) pB;
  89. /* handle vector blocks */
  90. uint32_t blkCnt = numberOfBools / 128;
  91. while (blkCnt > 0U) {
  92. uint8x16_t vecA = vld1q((const uint8_t *) pA8);
  93. uint8x16_t vecB = vld1q((const uint8_t *) pB8);
  94. #ifdef TT
  95. uint8x16_t vecTT = vecA & vecB;
  96. vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
  97. _ctt += vaddvq(vecTT);
  98. #endif
  99. #ifdef FF
  100. uint8x16_t vecFF = vmvnq(vecA) & vmvnq(vecB);
  101. vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
  102. _cff += vaddvq(vecFF);
  103. #endif
  104. #ifdef TF
  105. uint8x16_t vecTF = vecA & vmvnq(vecB);
  106. vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
  107. _ctf += vaddvq(vecTF);
  108. #endif
  109. #ifdef FT
  110. uint8x16_t vecFT = vmvnq(vecA) & vecB;
  111. vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
  112. _cft += vaddvq(vecFT);
  113. #endif
  114. pA8 += 16;
  115. pB8 += 16;
  116. blkCnt--;
  117. }
  118. pA = (const uint32_t *)pA8;
  119. pB = (const uint32_t *)pB8;
  120. blkCnt = numberOfBools & 0x7F;
  121. while(blkCnt >= 32)
  122. {
  123. a = *pA++;
  124. b = *pB++;
  125. shift = 0;
  126. while(shift < 32)
  127. {
  128. ba = a & 1;
  129. bb = b & 1;
  130. a = a >> 1;
  131. b = b >> 1;
  132. #ifdef TT
  133. _ctt += (ba && bb);
  134. #endif
  135. #ifdef FF
  136. _cff += ((1 ^ ba) && (1 ^ bb));
  137. #endif
  138. #ifdef TF
  139. _ctf += (ba && (1 ^ bb));
  140. #endif
  141. #ifdef FT
  142. _cft += ((1 ^ ba) && bb);
  143. #endif
  144. shift ++;
  145. }
  146. blkCnt -= 32;
  147. }
  148. a = *pA++;
  149. b = *pB++;
  150. a = a >> (32 - blkCnt);
  151. b = b >> (32 - blkCnt);
  152. while(blkCnt > 0)
  153. {
  154. ba = a & 1;
  155. bb = b & 1;
  156. a = a >> 1;
  157. b = b >> 1;
  158. #ifdef TT
  159. _ctt += (ba && bb);
  160. #endif
  161. #ifdef FF
  162. _cff += ((1 ^ ba) && (1 ^ bb));
  163. #endif
  164. #ifdef TF
  165. _ctf += (ba && (1 ^ bb));
  166. #endif
  167. #ifdef FT
  168. _cft += ((1 ^ ba) && bb);
  169. #endif
  170. blkCnt --;
  171. }
  172. #ifdef TT
  173. *cTT = _ctt;
  174. #endif
  175. #ifdef FF
  176. *cFF = _cff;
  177. #endif
  178. #ifdef TF
  179. *cTF = _ctf;
  180. #endif
  181. #ifdef FT
  182. *cFT = _cft;
  183. #endif
  184. }
  185. #else
  186. #if defined(ARM_MATH_NEON)
  187. void FUNC(EXT)(const uint32_t *pA
  188. , const uint32_t *pB
  189. , uint32_t numberOfBools
  190. #ifdef TT
  191. , uint32_t *cTT
  192. #endif
  193. #ifdef FF
  194. , uint32_t *cFF
  195. #endif
  196. #ifdef TF
  197. , uint32_t *cTF
  198. #endif
  199. #ifdef FT
  200. , uint32_t *cFT
  201. #endif
  202. )
  203. {
  204. #ifdef TT
  205. uint32_t _ctt=0;
  206. #endif
  207. #ifdef FF
  208. uint32_t _cff=0;
  209. #endif
  210. #ifdef TF
  211. uint32_t _ctf=0;
  212. #endif
  213. #ifdef FT
  214. uint32_t _cft=0;
  215. #endif
  216. uint32_t nbBoolBlock;
  217. uint32_t a,b,ba,bb;
  218. int shift;
  219. uint32x4_t aV, bV;
  220. #ifdef TT
  221. uint32x4_t cttV;
  222. #endif
  223. #ifdef FF
  224. uint32x4_t cffV;
  225. #endif
  226. #ifdef TF
  227. uint32x4_t ctfV;
  228. #endif
  229. #ifdef FT
  230. uint32x4_t cftV;
  231. #endif
  232. uint8x16_t tmp;
  233. uint16x8_t tmp2;
  234. uint32x4_t tmp3;
  235. uint64x2_t tmp4;
  236. #ifdef TT
  237. uint64x2_t tmp4tt;
  238. #endif
  239. #ifdef FF
  240. uint64x2_t tmp4ff;
  241. #endif
  242. #ifdef TF
  243. uint64x2_t tmp4tf;
  244. #endif
  245. #ifdef FT
  246. uint64x2_t tmp4ft;
  247. #endif
  248. #ifdef TT
  249. tmp4tt = vdupq_n_u64(0);
  250. #endif
  251. #ifdef FF
  252. tmp4ff = vdupq_n_u64(0);
  253. #endif
  254. #ifdef TF
  255. tmp4tf = vdupq_n_u64(0);
  256. #endif
  257. #ifdef FT
  258. tmp4ft = vdupq_n_u64(0);
  259. #endif
  260. nbBoolBlock = numberOfBools >> 7;
  261. while(nbBoolBlock > 0)
  262. {
  263. aV = vld1q_u32(pA);
  264. bV = vld1q_u32(pB);
  265. pA += 4;
  266. pB += 4;
  267. #ifdef TT
  268. cttV = vandq_u32(aV,bV);
  269. #endif
  270. #ifdef FF
  271. cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
  272. #endif
  273. #ifdef TF
  274. ctfV = vandq_u32(aV,vmvnq_u32(bV));
  275. #endif
  276. #ifdef FT
  277. cftV = vandq_u32(vmvnq_u32(aV),bV);
  278. #endif
  279. #ifdef TT
  280. tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
  281. tmp2 = vpaddlq_u8(tmp);
  282. tmp3 = vpaddlq_u16(tmp2);
  283. tmp4 = vpaddlq_u32(tmp3);
  284. tmp4tt = vaddq_u64(tmp4tt, tmp4);
  285. #endif
  286. #ifdef FF
  287. tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
  288. tmp2 = vpaddlq_u8(tmp);
  289. tmp3 = vpaddlq_u16(tmp2);
  290. tmp4 = vpaddlq_u32(tmp3);
  291. tmp4ff = vaddq_u64(tmp4ff, tmp4);
  292. #endif
  293. #ifdef TF
  294. tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
  295. tmp2 = vpaddlq_u8(tmp);
  296. tmp3 = vpaddlq_u16(tmp2);
  297. tmp4 = vpaddlq_u32(tmp3);
  298. tmp4tf = vaddq_u64(tmp4tf, tmp4);
  299. #endif
  300. #ifdef FT
  301. tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
  302. tmp2 = vpaddlq_u8(tmp);
  303. tmp3 = vpaddlq_u16(tmp2);
  304. tmp4 = vpaddlq_u32(tmp3);
  305. tmp4ft = vaddq_u64(tmp4ft, tmp4);
  306. #endif
  307. nbBoolBlock --;
  308. }
  309. #ifdef TT
  310. _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
  311. #endif
  312. #ifdef FF
  313. _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
  314. #endif
  315. #ifdef TF
  316. _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
  317. #endif
  318. #ifdef FT
  319. _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
  320. #endif
  321. nbBoolBlock = numberOfBools & 0x7F;
  322. while(nbBoolBlock >= 32)
  323. {
  324. a = *pA++;
  325. b = *pB++;
  326. shift = 0;
  327. while(shift < 32)
  328. {
  329. ba = a & 1;
  330. bb = b & 1;
  331. a = a >> 1;
  332. b = b >> 1;
  333. #ifdef TT
  334. _ctt += (ba && bb);
  335. #endif
  336. #ifdef FF
  337. _cff += ((1 ^ ba) && (1 ^ bb));
  338. #endif
  339. #ifdef TF
  340. _ctf += (ba && (1 ^ bb));
  341. #endif
  342. #ifdef FT
  343. _cft += ((1 ^ ba) && bb);
  344. #endif
  345. shift ++;
  346. }
  347. nbBoolBlock -= 32;
  348. }
  349. a = *pA++;
  350. b = *pB++;
  351. a = a >> (32 - nbBoolBlock);
  352. b = b >> (32 - nbBoolBlock);
  353. while(nbBoolBlock > 0)
  354. {
  355. ba = a & 1;
  356. bb = b & 1;
  357. a = a >> 1;
  358. b = b >> 1;
  359. #ifdef TT
  360. _ctt += (ba && bb);
  361. #endif
  362. #ifdef FF
  363. _cff += ((1 ^ ba) && (1 ^ bb));
  364. #endif
  365. #ifdef TF
  366. _ctf += (ba && (1 ^ bb));
  367. #endif
  368. #ifdef FT
  369. _cft += ((1 ^ ba) && bb);
  370. #endif
  371. nbBoolBlock --;
  372. }
  373. #ifdef TT
  374. *cTT = _ctt;
  375. #endif
  376. #ifdef FF
  377. *cFF = _cff;
  378. #endif
  379. #ifdef TF
  380. *cTF = _ctf;
  381. #endif
  382. #ifdef FT
  383. *cFT = _cft;
  384. #endif
  385. }
  386. #else
  387. void FUNC(EXT)(const uint32_t *pA
  388. , const uint32_t *pB
  389. , uint32_t numberOfBools
  390. #ifdef TT
  391. , uint32_t *cTT
  392. #endif
  393. #ifdef FF
  394. , uint32_t *cFF
  395. #endif
  396. #ifdef TF
  397. , uint32_t *cTF
  398. #endif
  399. #ifdef FT
  400. , uint32_t *cFT
  401. #endif
  402. )
  403. {
  404. #ifdef TT
  405. uint32_t _ctt=0;
  406. #endif
  407. #ifdef FF
  408. uint32_t _cff=0;
  409. #endif
  410. #ifdef TF
  411. uint32_t _ctf=0;
  412. #endif
  413. #ifdef FT
  414. uint32_t _cft=0;
  415. #endif
  416. uint32_t a,b,ba,bb;
  417. int shift;
  418. while(numberOfBools >= 32)
  419. {
  420. a = *pA++;
  421. b = *pB++;
  422. shift = 0;
  423. while(shift < 32)
  424. {
  425. ba = a & 1;
  426. bb = b & 1;
  427. a = a >> 1;
  428. b = b >> 1;
  429. #ifdef TT
  430. _ctt += (ba && bb);
  431. #endif
  432. #ifdef FF
  433. _cff += ((1 ^ ba) && (1 ^ bb));
  434. #endif
  435. #ifdef TF
  436. _ctf += (ba && (1 ^ bb));
  437. #endif
  438. #ifdef FT
  439. _cft += ((1 ^ ba) && bb);
  440. #endif
  441. shift ++;
  442. }
  443. numberOfBools -= 32;
  444. }
  445. a = *pA++;
  446. b = *pB++;
  447. a = a >> (32 - numberOfBools);
  448. b = b >> (32 - numberOfBools);
  449. while(numberOfBools > 0)
  450. {
  451. ba = a & 1;
  452. bb = b & 1;
  453. a = a >> 1;
  454. b = b >> 1;
  455. #ifdef TT
  456. _ctt += (ba && bb);
  457. #endif
  458. #ifdef FF
  459. _cff += ((1 ^ ba) && (1 ^ bb));
  460. #endif
  461. #ifdef TF
  462. _ctf += (ba && (1 ^ bb));
  463. #endif
  464. #ifdef FT
  465. _cft += ((1 ^ ba) && bb);
  466. #endif
  467. numberOfBools --;
  468. }
  469. #ifdef TT
  470. *cTT = _ctt;
  471. #endif
  472. #ifdef FF
  473. *cFF = _cff;
  474. #endif
  475. #ifdef TF
  476. *cTF = _ctf;
  477. #endif
  478. #ifdef FT
  479. *cFT = _cft;
  480. #endif
  481. }
  482. #endif
  483. #endif /* defined(ARM_MATH_MVEI) */
  484. /**
  485. * @} end of DISTANCEF group
  486. */