UnaryTestsF64.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. #include "UnaryTestsF64.h"
  2. #include "Error.h"
  3. #define SNR_THRESHOLD 120
  4. /*
  5. Reference patterns are generated with
  6. a double precision computation.
  7. */
  8. #define REL_ERROR (1.0e-6)
  9. #define ABS_ERROR (1.0e-5)
  10. /*
  11. Comparison for Cholesky
  12. */
  13. #define SNR_THRESHOLD_CHOL 270
  14. #define REL_ERROR_CHOL (1.0e-9)
  15. #define ABS_ERROR_CHOL (1.0e-9)
  16. /* LDLT comparison */
  17. #define REL_ERROR_LDLT (1e-5)
  18. #define ABS_ERROR_LDLT (1e-5)
  19. /* Upper bound of maximum matrix dimension used by Python */
  20. #define MAXMATRIXDIM 40
  21. #define LOADDATA2() \
  22. const float64_t *inp1=input1.ptr(); \
  23. const float64_t *inp2=input2.ptr(); \
  24. \
  25. float64_t *ap=a.ptr(); \
  26. float64_t *bp=b.ptr(); \
  27. \
  28. float64_t *outp=output.ptr(); \
  29. int16_t *dimsp = dims.ptr(); \
  30. int nbMatrixes = dims.nbSamples() >> 1;\
  31. int rows,columns; \
  32. int i;
  33. #define LOADDATA1() \
  34. const float64_t *inp1=input1.ptr(); \
  35. \
  36. float64_t *ap=a.ptr(); \
  37. \
  38. float64_t *outp=output.ptr(); \
  39. int16_t *dimsp = dims.ptr(); \
  40. int nbMatrixes = dims.nbSamples() >> 1;\
  41. int rows,columns; \
  42. int i;
  43. #define PREPAREDATA2() \
  44. in1.numRows=rows; \
  45. in1.numCols=columns; \
  46. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*columns);\
  47. in1.pData = ap; \
  48. \
  49. in2.numRows=rows; \
  50. in2.numCols=columns; \
  51. memcpy((void*)bp,(const void*)inp2,sizeof(float64_t)*rows*columns);\
  52. in2.pData = bp; \
  53. \
  54. out.numRows=rows; \
  55. out.numCols=columns; \
  56. out.pData = outp;
  57. #define PREPAREDATA1(TRANSPOSED) \
  58. in1.numRows=rows; \
  59. in1.numCols=columns; \
  60. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*columns);\
  61. in1.pData = ap; \
  62. \
  63. if (TRANSPOSED) \
  64. { \
  65. out.numRows=columns; \
  66. out.numCols=rows; \
  67. } \
  68. else \
  69. { \
  70. out.numRows=rows; \
  71. out.numCols=columns; \
  72. } \
  73. out.pData = outp;
  74. #define PREPAREDATALL1() \
  75. in1.numRows=rows; \
  76. in1.numCols=columns; \
  77. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*columns);\
  78. in1.pData = ap; \
  79. \
  80. outll.numRows=rows; \
  81. outll.numCols=columns; \
  82. \
  83. outll.pData = outllp;
  84. #define SWAP_ROWS(A,i,j) \
  85. for(int w=0;w < n; w++) \
  86. { \
  87. float64_t tmp; \
  88. tmp = A[i*n + w]; \
  89. A[i*n + w] = A[j*n + w];\
  90. A[j*n + w] = tmp; \
  91. }
  92. void UnaryTestsF64::test_mat_add_f64()
  93. {
  94. }
  95. void UnaryTestsF64::test_mat_sub_f64()
  96. {
  97. LOADDATA2();
  98. arm_status status;
  99. for(i=0;i < nbMatrixes ; i ++)
  100. {
  101. rows = *dimsp++;
  102. columns = *dimsp++;
  103. PREPAREDATA2();
  104. status=arm_mat_sub_f64(&this->in1,&this->in2,&this->out);
  105. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  106. outp += (rows * columns);
  107. }
  108. ASSERT_EMPTY_TAIL(output);
  109. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  110. ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
  111. }
  112. void UnaryTestsF64::test_mat_scale_f64()
  113. {
  114. }
  115. void UnaryTestsF64::test_mat_trans_f64()
  116. {
  117. LOADDATA1();
  118. arm_status status;
  119. for(i=0;i < nbMatrixes ; i ++)
  120. {
  121. rows = *dimsp++;
  122. columns = *dimsp++;
  123. PREPAREDATA1(true);
  124. status=arm_mat_trans_f64(&this->in1,&this->out);
  125. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  126. outp += (rows * columns);
  127. }
  128. ASSERT_EMPTY_TAIL(output);
  129. ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
  130. ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
  131. }
  132. /*
  133. Test framework is only adding 16 bytes of free memory after the end of a buffer.
  134. So, we limit to 2 float64 for checking out of buffer write.
  135. */
  136. static void refInnerTail(float64_t *b)
  137. {
  138. b[0] = 1.0;
  139. b[1] = -2.0;
  140. }
  141. static void checkInnerTail(float64_t *b)
  142. {
  143. ASSERT_TRUE(b[0] == 1.0);
  144. ASSERT_TRUE(b[1] == -2.0);
  145. }
  146. void UnaryTestsF64::test_mat_inverse_f64()
  147. {
  148. const float64_t *inp1=input1.ptr();
  149. float64_t *ap=a.ptr();
  150. float64_t *outp=output.ptr();
  151. int16_t *dimsp = dims.ptr();
  152. int nbMatrixes = dims.nbSamples();
  153. int rows,columns;
  154. int i;
  155. arm_status status;
  156. for(i=0;i < nbMatrixes ; i ++)
  157. {
  158. rows = *dimsp++;
  159. columns = rows;
  160. PREPAREDATA1(false);
  161. refInnerTail(outp+(rows * columns));
  162. status=arm_mat_inverse_f64(&this->in1,&this->out);
  163. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  164. outp += (rows * columns);
  165. inp1 += (rows * columns);
  166. checkInnerTail(outp);
  167. }
  168. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  169. ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
  170. }
  171. void UnaryTestsF64::test_mat_cholesky_dpo_f64()
  172. {
  173. float64_t *ap=a.ptr();
  174. const float64_t *inp1=input1.ptr();
  175. float64_t *outp=output.ptr();
  176. int16_t *dimsp = dims.ptr();
  177. int nbMatrixes = dims.nbSamples();
  178. int rows,columns;
  179. int i;
  180. arm_status status;
  181. for(i=0;i < nbMatrixes ; i ++)
  182. {
  183. rows = *dimsp++;
  184. columns = rows;
  185. PREPAREDATA1(false);
  186. status=arm_mat_cholesky_f64(&this->in1,&this->out);
  187. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  188. outp += (rows * columns);
  189. inp1 += (rows * columns);
  190. }
  191. ASSERT_EMPTY_TAIL(output);
  192. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD_CHOL);
  193. ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR_CHOL,REL_ERROR_CHOL);
  194. }
  195. void UnaryTestsF64::test_solve_upper_triangular_f64()
  196. {
  197. float64_t *ap=a.ptr();
  198. const float64_t *inp1=input1.ptr();
  199. float64_t *bp=b.ptr();
  200. const float64_t *inp2=input2.ptr();
  201. float64_t *outp=output.ptr();
  202. int16_t *dimsp = dims.ptr();
  203. int nbMatrixes = dims.nbSamples();
  204. int rows,columns;
  205. int i;
  206. arm_status status;
  207. for(i=0;i < nbMatrixes ; i ++)
  208. {
  209. rows = *dimsp++;
  210. columns = rows;
  211. PREPAREDATA2();
  212. status=arm_mat_solve_upper_triangular_f64(&this->in1,&this->in2,&this->out);
  213. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  214. outp += (rows * columns);
  215. inp1 += (rows * columns);
  216. inp2 += (rows * columns);
  217. }
  218. ASSERT_EMPTY_TAIL(output);
  219. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  220. ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
  221. }
  222. void UnaryTestsF64::test_solve_lower_triangular_f64()
  223. {
  224. float64_t *ap=a.ptr();
  225. const float64_t *inp1=input1.ptr();
  226. float64_t *bp=b.ptr();
  227. const float64_t *inp2=input2.ptr();
  228. float64_t *outp=output.ptr();
  229. int16_t *dimsp = dims.ptr();
  230. int nbMatrixes = dims.nbSamples();
  231. int rows,columns;
  232. int i;
  233. arm_status status;
  234. for(i=0;i < nbMatrixes ; i ++)
  235. {
  236. rows = *dimsp++;
  237. columns = rows;
  238. PREPAREDATA2();
  239. status=arm_mat_solve_lower_triangular_f64(&this->in1,&this->in2,&this->out);
  240. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  241. outp += (rows * columns);
  242. inp1 += (rows * columns);
  243. inp2 += (rows * columns);
  244. }
  245. ASSERT_EMPTY_TAIL(output);
  246. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  247. ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
  248. }
  249. static void trans_f64(const float64_t *src, float64_t *dst, int n)
  250. {
  251. for(int r=0; r<n ; r++)
  252. {
  253. for(int c=0; c<n ; c++)
  254. {
  255. dst[c*n+r] = src[r*n+c];
  256. }
  257. }
  258. }
  259. static void mult_f64_f64(const float64_t *srcA, const float64_t *srcB, float64_t *dst,int n)
  260. {
  261. for(int r=0; r<n ; r++)
  262. {
  263. for(int c=0; c<n ; c++)
  264. {
  265. float64_t sum=0.0;
  266. for(int k=0; k < n ; k++)
  267. {
  268. sum += srcA[r*n+k] * srcB[k*n+c];
  269. }
  270. dst[r*n+c] = sum;
  271. }
  272. }
  273. }
  274. void UnaryTestsF64::compute_ldlt_error(const int n,const int16_t *outpp)
  275. {
  276. float64_t *tmpa = tmpapat.ptr() ;
  277. float64_t *tmpb = tmpbpat.ptr() ;
  278. float64_t *tmpc = tmpcpat.ptr() ;
  279. /* Compute P A P^t */
  280. // Create identiy matrix
  281. for(int r=0; r < n; r++)
  282. {
  283. for(int c=0; c < n; c++)
  284. {
  285. if (r == c)
  286. {
  287. tmpa[r*n+c] = 1.0;
  288. }
  289. else
  290. {
  291. tmpa[r*n+c] = 0.0;
  292. }
  293. }
  294. }
  295. // Create permutation matrix
  296. for(int r=0;r < n; r++)
  297. {
  298. SWAP_ROWS(tmpa,r,outpp[r]);
  299. }
  300. trans_f64((const float64_t*)tmpa,tmpb,n);
  301. mult_f64_f64((const float64_t*)this->in1.pData,(const float64_t*)tmpb,tmpc,n);
  302. mult_f64_f64((const float64_t*)tmpa,(const float64_t*)tmpc,outa,n);
  303. /* Compute L D L^t */
  304. trans_f64((const float64_t*)this->outll.pData,tmpc,n);
  305. mult_f64_f64((const float64_t*)this->outd.pData,(const float64_t*)tmpc,tmpa,n);
  306. mult_f64_f64((const float64_t*)this->outll.pData,(const float64_t*)tmpa,outb,n);
  307. }
  308. void UnaryTestsF64::test_mat_ldl_f64()
  309. {
  310. float64_t *ap=a.ptr();
  311. const float64_t *inp1=input1.ptr();
  312. float64_t *outllp=outputll.ptr();
  313. float64_t *outdp=outputd.ptr();
  314. int16_t *outpp=outputp.ptr();
  315. outa=outputa.ptr();
  316. outb=outputb.ptr();
  317. int16_t *dimsp = dims.ptr();
  318. int nbMatrixes = dims.nbSamples();
  319. int rows,columns;
  320. int i;
  321. arm_status status;
  322. for(i=0;i < nbMatrixes ; i ++)
  323. {
  324. rows = *dimsp++;
  325. columns = rows;
  326. PREPAREDATALL1();
  327. outd.numRows=rows;
  328. outd.numCols=columns;
  329. outd.pData=outdp;
  330. memset(outpp,0,rows*sizeof(uint16_t));
  331. memset(outdp,0,columns*rows*sizeof(float64_t));
  332. status=arm_mat_ldlt_f64(&this->in1,&this->outll,&this->outd,(uint16_t*)outpp);
  333. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  334. compute_ldlt_error(rows,outpp);
  335. outllp += (rows * columns);
  336. outdp += (rows * columns);
  337. outpp += rows;
  338. outa += (rows * columns);
  339. outb +=(rows * columns);
  340. inp1 += (rows * columns);
  341. }
  342. ASSERT_EMPTY_TAIL(outputll);
  343. ASSERT_EMPTY_TAIL(outputd);
  344. ASSERT_EMPTY_TAIL(outputp);
  345. ASSERT_EMPTY_TAIL(outputa);
  346. ASSERT_EMPTY_TAIL(outputb);
  347. ASSERT_CLOSE_ERROR(outputa,outputb,ABS_ERROR_LDLT,REL_ERROR_LDLT);
  348. }
  349. void UnaryTestsF64::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
  350. {
  351. (void)params;
  352. switch(id)
  353. {
  354. case TEST_MAT_SUB_F64_2:
  355. input1.reload(UnaryTestsF64::INPUTS1_F64_ID,mgr);
  356. input2.reload(UnaryTestsF64::INPUTS2_F64_ID,mgr);
  357. dims.reload(UnaryTestsF64::DIMSUNARY1_S16_ID,mgr);
  358. ref.reload(UnaryTestsF64::REFSUB1_F64_ID,mgr);
  359. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  360. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  361. b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPB_F64_ID,mgr);
  362. break;
  363. case TEST_MAT_TRANS_F64_4:
  364. input1.reload(UnaryTestsF64::INPUTS1_F64_ID,mgr);
  365. dims.reload(UnaryTestsF64::DIMSUNARY1_S16_ID,mgr);
  366. ref.reload(UnaryTestsF64::REFTRANS1_F64_ID,mgr);
  367. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  368. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  369. break;
  370. case TEST_MAT_INVERSE_F64_5:
  371. input1.reload(UnaryTestsF64::INPUTSINV_F64_ID,mgr);
  372. dims.reload(UnaryTestsF64::DIMSINVERT1_S16_ID,mgr);
  373. ref.reload(UnaryTestsF64::REFINV1_F64_ID,mgr);
  374. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  375. a.create(ref.nbSamples(),UnaryTestsF64::TMPA_F64_ID,mgr);
  376. break;
  377. case TEST_MAT_CHOLESKY_DPO_F64_6:
  378. input1.reload(UnaryTestsF64::INPUTSCHOLESKY1_DPO_F64_ID,mgr);
  379. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_DPO_S16_ID,mgr);
  380. ref.reload(UnaryTestsF64::REFCHOLESKY1_DPO_F64_ID,mgr);
  381. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  382. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  383. break;
  384. case TEST_SOLVE_UPPER_TRIANGULAR_F64_7:
  385. input1.reload(UnaryTestsF64::INPUT_UT_DPO_F64_ID,mgr);
  386. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_DPO_S16_ID,mgr);
  387. input2.reload(UnaryTestsF64::INPUT_RNDA_DPO_F64_ID,mgr);
  388. ref.reload(UnaryTestsF64::REF_UTINV_DPO_F64_ID,mgr);
  389. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  390. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  391. b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPB_F64_ID,mgr);
  392. break;
  393. case TEST_SOLVE_LOWER_TRIANGULAR_F64_8:
  394. input1.reload(UnaryTestsF64::INPUT_LT_DPO_F64_ID,mgr);
  395. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_DPO_S16_ID,mgr);
  396. input2.reload(UnaryTestsF64::INPUT_RNDA_DPO_F64_ID,mgr);
  397. ref.reload(UnaryTestsF64::REF_LTINV_DPO_F64_ID,mgr);
  398. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  399. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  400. b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPB_F64_ID,mgr);
  401. break;
  402. case TEST_MAT_LDL_F64_9:
  403. // Definite positive test
  404. input1.reload(UnaryTestsF64::INPUTSCHOLESKY1_DPO_F64_ID,mgr);
  405. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_DPO_S16_ID,mgr);
  406. outputll.create(input1.nbSamples(),UnaryTestsF64::LL_F64_ID,mgr);
  407. outputd.create(input1.nbSamples(),UnaryTestsF64::D_F64_ID,mgr);
  408. outputp.create(input1.nbSamples(),UnaryTestsF64::PERM_S16_ID,mgr);
  409. outputa.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  410. outputb.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  411. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  412. tmpapat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDB_F64_ID,mgr);
  413. tmpbpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDC_F64_ID,mgr);
  414. tmpcpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDD_F64_ID,mgr);
  415. break;
  416. case TEST_MAT_LDL_F64_10:
  417. // Semi definite positive test
  418. input1.reload(UnaryTestsF64::INPUTSCHOLESKY1_SDPO_F64_ID,mgr);
  419. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_SDPO_S16_ID,mgr);
  420. outputll.create(input1.nbSamples(),UnaryTestsF64::LL_F64_ID,mgr);
  421. outputd.create(input1.nbSamples(),UnaryTestsF64::D_F64_ID,mgr);
  422. outputp.create(input1.nbSamples(),UnaryTestsF64::PERM_S16_ID,mgr);
  423. outputa.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  424. outputb.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  425. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  426. tmpapat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDB_F64_ID,mgr);
  427. tmpbpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDC_F64_ID,mgr);
  428. tmpcpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDD_F64_ID,mgr);
  429. break;
  430. }
  431. }
  432. void UnaryTestsF64::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
  433. {
  434. (void)id;
  435. //output.dump(mgr);
  436. (void)mgr;
  437. }