UnaryTestsF64.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629
  1. #include "UnaryTestsF64.h"
  2. #include "Error.h"
  3. #define SNR_THRESHOLD 120
  4. /*
  5. Reference patterns are generated with
  6. a double precision computation.
  7. */
  8. #define REL_ERROR (1.0e-6)
  9. #define ABS_ERROR (1.0e-5)
  10. /*
  11. Comparison for Cholesky
  12. */
  13. #define SNR_THRESHOLD_CHOL 270
  14. #define REL_ERROR_CHOL (1.0e-9)
  15. #define ABS_ERROR_CHOL (1.0e-9)
  16. /* LDLT comparison */
  17. #define REL_ERROR_LDLT (1e-5)
  18. #define ABS_ERROR_LDLT (1e-5)
  19. /* Upper bound of maximum matrix dimension used by Python */
  20. #define MAXMATRIXDIM 40
  21. #define LOADDATA2() \
  22. const float64_t *inp1=input1.ptr(); \
  23. const float64_t *inp2=input2.ptr(); \
  24. \
  25. float64_t *ap=a.ptr(); \
  26. float64_t *bp=b.ptr(); \
  27. \
  28. float64_t *outp=output.ptr(); \
  29. int16_t *dimsp = dims.ptr(); \
  30. int nbMatrixes = dims.nbSamples() >> 1;\
  31. int rows,columns; \
  32. int i;
  33. #define LOADDATA1() \
  34. const float64_t *inp1=input1.ptr(); \
  35. \
  36. float64_t *ap=a.ptr(); \
  37. \
  38. float64_t *outp=output.ptr(); \
  39. int16_t *dimsp = dims.ptr(); \
  40. int nbMatrixes = dims.nbSamples() >> 1;\
  41. int rows,columns; \
  42. int i;
  43. #define PREPAREDATA2() \
  44. in1.numRows=rows; \
  45. in1.numCols=columns; \
  46. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*columns);\
  47. in1.pData = ap; \
  48. \
  49. in2.numRows=rows; \
  50. in2.numCols=columns; \
  51. memcpy((void*)bp,(const void*)inp2,sizeof(float64_t)*rows*columns);\
  52. in2.pData = bp; \
  53. \
  54. out.numRows=rows; \
  55. out.numCols=columns; \
  56. out.pData = outp;
  57. #define PREPAREDATALT() \
  58. in1.numRows=rows; \
  59. in1.numCols=rows; \
  60. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*rows); \
  61. in1.pData = ap; \
  62. \
  63. in2.numRows=rows; \
  64. in2.numCols=columns; \
  65. memcpy((void*)bp,(const void*)inp2,sizeof(float64_t)*rows*columns);\
  66. in2.pData = bp; \
  67. \
  68. out.numRows=rows; \
  69. out.numCols=columns; \
  70. out.pData = outp;
  71. #define PREPAREDATA1(TRANSPOSED) \
  72. in1.numRows=rows; \
  73. in1.numCols=columns; \
  74. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*columns);\
  75. in1.pData = ap; \
  76. \
  77. if (TRANSPOSED) \
  78. { \
  79. out.numRows=columns; \
  80. out.numCols=rows; \
  81. } \
  82. else \
  83. { \
  84. out.numRows=rows; \
  85. out.numCols=columns; \
  86. } \
  87. out.pData = outp;
  88. #define PREPAREDATALL1() \
  89. in1.numRows=rows; \
  90. in1.numCols=columns; \
  91. memcpy((void*)ap,(const void*)inp1,sizeof(float64_t)*rows*columns);\
  92. in1.pData = ap; \
  93. \
  94. outll.numRows=rows; \
  95. outll.numCols=columns; \
  96. \
  97. outll.pData = outllp;
  98. #define SWAP_ROWS(A,i,j) \
  99. for(int w=0;w < n; w++) \
  100. { \
  101. float64_t tmp; \
  102. tmp = A[i*n + w]; \
  103. A[i*n + w] = A[j*n + w];\
  104. A[j*n + w] = tmp; \
  105. }
  106. void UnaryTestsF64::test_mat_add_f64()
  107. {
  108. }
  109. void UnaryTestsF64::test_mat_sub_f64()
  110. {
  111. LOADDATA2();
  112. arm_status status;
  113. for(i=0;i < nbMatrixes ; i ++)
  114. {
  115. rows = *dimsp++;
  116. columns = *dimsp++;
  117. PREPAREDATA2();
  118. status=arm_mat_sub_f64(&this->in1,&this->in2,&this->out);
  119. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  120. outp += (rows * columns);
  121. }
  122. ASSERT_EMPTY_TAIL(output);
  123. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  124. ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
  125. }
  126. void UnaryTestsF64::test_mat_scale_f64()
  127. {
  128. }
  129. void UnaryTestsF64::test_mat_trans_f64()
  130. {
  131. LOADDATA1();
  132. arm_status status;
  133. for(i=0;i < nbMatrixes ; i ++)
  134. {
  135. rows = *dimsp++;
  136. columns = *dimsp++;
  137. PREPAREDATA1(true);
  138. status=arm_mat_trans_f64(&this->in1,&this->out);
  139. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  140. outp += (rows * columns);
  141. }
  142. ASSERT_EMPTY_TAIL(output);
  143. ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
  144. ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
  145. }
  146. /*
  147. Test framework is only adding 16 bytes of free memory after the end of a buffer.
  148. So, we limit to 2 float64 for checking out of buffer write.
  149. */
  150. static void refInnerTail(float64_t *b)
  151. {
  152. b[0] = 1.0;
  153. b[1] = -2.0;
  154. }
  155. static void checkInnerTail(float64_t *b)
  156. {
  157. ASSERT_TRUE(b[0] == 1.0);
  158. ASSERT_TRUE(b[1] == -2.0);
  159. }
  160. void UnaryTestsF64::test_mat_inverse_f64()
  161. {
  162. const float64_t *inp1=input1.ptr();
  163. float64_t *ap=a.ptr();
  164. float64_t *outp=output.ptr();
  165. int16_t *dimsp = dims.ptr();
  166. int nbMatrixes = dims.nbSamples();
  167. int rows,columns;
  168. int i;
  169. arm_status status;
  170. for(i=0;i < nbMatrixes ; i ++)
  171. {
  172. rows = *dimsp++;
  173. columns = rows;
  174. PREPAREDATA1(false);
  175. refInnerTail(outp+(rows * columns));
  176. status=arm_mat_inverse_f64(&this->in1,&this->out);
  177. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  178. outp += (rows * columns);
  179. inp1 += (rows * columns);
  180. checkInnerTail(outp);
  181. }
  182. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  183. ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
  184. }
  185. void UnaryTestsF64::test_mat_cholesky_dpo_f64()
  186. {
  187. float64_t *ap=a.ptr();
  188. const float64_t *inp1=input1.ptr();
  189. float64_t *outp=output.ptr();
  190. int16_t *dimsp = dims.ptr();
  191. int nbMatrixes = dims.nbSamples();
  192. int rows,columns;
  193. int i;
  194. arm_status status;
  195. for(i=0;i < nbMatrixes ; i ++)
  196. {
  197. rows = *dimsp++;
  198. columns = rows;
  199. PREPAREDATA1(false);
  200. status=arm_mat_cholesky_f64(&this->in1,&this->out);
  201. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  202. outp += (rows * columns);
  203. inp1 += (rows * columns);
  204. }
  205. ASSERT_EMPTY_TAIL(output);
  206. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD_CHOL);
  207. ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR_CHOL,REL_ERROR_CHOL);
  208. }
  209. void UnaryTestsF64::test_solve_upper_triangular_f64()
  210. {
  211. float64_t *ap=a.ptr();
  212. const float64_t *inp1=input1.ptr();
  213. float64_t *bp=b.ptr();
  214. const float64_t *inp2=input2.ptr();
  215. float64_t *outp=output.ptr();
  216. int16_t *dimsp = dims.ptr();
  217. int nbMatrixes = dims.nbSamples()>>1;
  218. int rows,columns;
  219. int i;
  220. arm_status status;
  221. for(i=0;i < nbMatrixes ; i ++)
  222. {
  223. rows = *dimsp++;
  224. columns = *dimsp++;
  225. PREPAREDATALT();
  226. status=arm_mat_solve_upper_triangular_f64(&this->in1,&this->in2,&this->out);
  227. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  228. outp += (rows * columns);
  229. inp1 += (rows * rows);
  230. inp2 += (rows * columns);
  231. }
  232. ASSERT_EMPTY_TAIL(output);
  233. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  234. ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
  235. }
  236. void UnaryTestsF64::test_solve_lower_triangular_f64()
  237. {
  238. float64_t *ap=a.ptr();
  239. const float64_t *inp1=input1.ptr();
  240. float64_t *bp=b.ptr();
  241. const float64_t *inp2=input2.ptr();
  242. float64_t *outp=output.ptr();
  243. int16_t *dimsp = dims.ptr();
  244. int nbMatrixes = dims.nbSamples()>>1;
  245. int rows,columns;
  246. int i;
  247. arm_status status;
  248. for(i=0;i < nbMatrixes ; i ++)
  249. {
  250. rows = *dimsp++;
  251. columns = *dimsp++;
  252. PREPAREDATALT();
  253. status=arm_mat_solve_lower_triangular_f64(&this->in1,&this->in2,&this->out);
  254. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  255. outp += (rows * columns);
  256. inp1 += (rows * rows);
  257. inp2 += (rows * columns);
  258. }
  259. ASSERT_EMPTY_TAIL(output);
  260. ASSERT_SNR(output,ref,(float64_t)SNR_THRESHOLD);
  261. ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
  262. }
  263. static void trans_f64(const float64_t *src, float64_t *dst, int n)
  264. {
  265. for(int r=0; r<n ; r++)
  266. {
  267. for(int c=0; c<n ; c++)
  268. {
  269. dst[c*n+r] = src[r*n+c];
  270. }
  271. }
  272. }
  273. static void mult_f64_f64(const float64_t *srcA, const float64_t *srcB, float64_t *dst,int n)
  274. {
  275. for(int r=0; r<n ; r++)
  276. {
  277. for(int c=0; c<n ; c++)
  278. {
  279. float64_t sum=0.0;
  280. for(int k=0; k < n ; k++)
  281. {
  282. sum += srcA[r*n+k] * srcB[k*n+c];
  283. }
  284. dst[r*n+c] = sum;
  285. }
  286. }
  287. }
  288. void UnaryTestsF64::compute_ldlt_error(const int n,const int16_t *outpp)
  289. {
  290. float64_t *tmpa = tmpapat.ptr() ;
  291. float64_t *tmpb = tmpbpat.ptr() ;
  292. float64_t *tmpc = tmpcpat.ptr() ;
  293. /* Compute P A P^t */
  294. // Create identiy matrix
  295. for(int r=0; r < n; r++)
  296. {
  297. for(int c=0; c < n; c++)
  298. {
  299. if (r == c)
  300. {
  301. tmpa[r*n+c] = 1.0;
  302. }
  303. else
  304. {
  305. tmpa[r*n+c] = 0.0;
  306. }
  307. }
  308. }
  309. // Create permutation matrix
  310. for(int r=0;r < n; r++)
  311. {
  312. SWAP_ROWS(tmpa,r,outpp[r]);
  313. }
  314. trans_f64((const float64_t*)tmpa,tmpb,n);
  315. mult_f64_f64((const float64_t*)this->in1.pData,(const float64_t*)tmpb,tmpc,n);
  316. mult_f64_f64((const float64_t*)tmpa,(const float64_t*)tmpc,outa,n);
  317. /* Compute L D L^t */
  318. trans_f64((const float64_t*)this->outll.pData,tmpc,n);
  319. mult_f64_f64((const float64_t*)this->outd.pData,(const float64_t*)tmpc,tmpa,n);
  320. mult_f64_f64((const float64_t*)this->outll.pData,(const float64_t*)tmpa,outb,n);
  321. }
  322. void UnaryTestsF64::test_mat_ldl_f64()
  323. {
  324. float64_t *ap=a.ptr();
  325. const float64_t *inp1=input1.ptr();
  326. float64_t *outllp=outputll.ptr();
  327. float64_t *outdp=outputd.ptr();
  328. int16_t *outpp=outputp.ptr();
  329. outa=outputa.ptr();
  330. outb=outputb.ptr();
  331. int16_t *dimsp = dims.ptr();
  332. int nbMatrixes = dims.nbSamples();
  333. int rows,columns;
  334. int i;
  335. arm_status status;
  336. for(i=0;i < nbMatrixes ; i ++)
  337. {
  338. rows = *dimsp++;
  339. columns = rows;
  340. PREPAREDATALL1();
  341. outd.numRows=rows;
  342. outd.numCols=columns;
  343. outd.pData=outdp;
  344. memset(outpp,0,rows*sizeof(uint16_t));
  345. memset(outdp,0,columns*rows*sizeof(float64_t));
  346. status=arm_mat_ldlt_f64(&this->in1,&this->outll,&this->outd,(uint16_t*)outpp);
  347. ASSERT_TRUE(status==ARM_MATH_SUCCESS);
  348. compute_ldlt_error(rows,outpp);
  349. outllp += (rows * columns);
  350. outdp += (rows * columns);
  351. outpp += rows;
  352. outa += (rows * columns);
  353. outb +=(rows * columns);
  354. inp1 += (rows * columns);
  355. }
  356. ASSERT_EMPTY_TAIL(outputll);
  357. ASSERT_EMPTY_TAIL(outputd);
  358. ASSERT_EMPTY_TAIL(outputp);
  359. ASSERT_EMPTY_TAIL(outputa);
  360. ASSERT_EMPTY_TAIL(outputb);
  361. ASSERT_CLOSE_ERROR(outputa,outputb,ABS_ERROR_LDLT,REL_ERROR_LDLT);
  362. }
  363. void UnaryTestsF64::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
  364. {
  365. (void)params;
  366. switch(id)
  367. {
  368. case TEST_MAT_SUB_F64_2:
  369. input1.reload(UnaryTestsF64::INPUTS1_F64_ID,mgr);
  370. input2.reload(UnaryTestsF64::INPUTS2_F64_ID,mgr);
  371. dims.reload(UnaryTestsF64::DIMSUNARY1_S16_ID,mgr);
  372. ref.reload(UnaryTestsF64::REFSUB1_F64_ID,mgr);
  373. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  374. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  375. b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPB_F64_ID,mgr);
  376. break;
  377. case TEST_MAT_TRANS_F64_4:
  378. input1.reload(UnaryTestsF64::INPUTS1_F64_ID,mgr);
  379. dims.reload(UnaryTestsF64::DIMSUNARY1_S16_ID,mgr);
  380. ref.reload(UnaryTestsF64::REFTRANS1_F64_ID,mgr);
  381. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  382. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  383. break;
  384. case TEST_MAT_INVERSE_F64_5:
  385. input1.reload(UnaryTestsF64::INPUTSINV_F64_ID,mgr);
  386. dims.reload(UnaryTestsF64::DIMSINVERT1_S16_ID,mgr);
  387. ref.reload(UnaryTestsF64::REFINV1_F64_ID,mgr);
  388. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  389. a.create(ref.nbSamples(),UnaryTestsF64::TMPA_F64_ID,mgr);
  390. break;
  391. case TEST_MAT_CHOLESKY_DPO_F64_6:
  392. input1.reload(UnaryTestsF64::INPUTSCHOLESKY1_DPO_F64_ID,mgr);
  393. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_DPO_S16_ID,mgr);
  394. ref.reload(UnaryTestsF64::REFCHOLESKY1_DPO_F64_ID,mgr);
  395. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  396. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  397. break;
  398. case TEST_SOLVE_UPPER_TRIANGULAR_F64_7:
  399. input1.reload(UnaryTestsF64::INPUT_MAT_UTSOLVE_F64_ID,mgr);
  400. input2.reload(UnaryTestsF64::INPUT_VEC_LTSOLVE_F64_ID,mgr);
  401. dims.reload(UnaryTestsF64::DIM_LTSOLVE_F64_ID,mgr);
  402. ref.reload(UnaryTestsF64::REF_UT_SOLVE_F64_ID,mgr);
  403. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  404. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  405. b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPB_F64_ID,mgr);
  406. break;
  407. case TEST_SOLVE_LOWER_TRIANGULAR_F64_8:
  408. input1.reload(UnaryTestsF64::INPUT_MAT_LTSOLVE_F64_ID,mgr);
  409. input2.reload(UnaryTestsF64::INPUT_VEC_LTSOLVE_F64_ID,mgr);
  410. dims.reload(UnaryTestsF64::DIM_LTSOLVE_F64_ID,mgr);
  411. ref.reload(UnaryTestsF64::REF_LT_SOLVE_F64_ID,mgr);
  412. output.create(ref.nbSamples(),UnaryTestsF64::OUT_F64_ID,mgr);
  413. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  414. b.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPB_F64_ID,mgr);
  415. break;
  416. case TEST_MAT_LDL_F64_9:
  417. // Definite positive test
  418. input1.reload(UnaryTestsF64::INPUTSCHOLESKY1_DPO_F64_ID,mgr);
  419. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_DPO_S16_ID,mgr);
  420. outputll.create(input1.nbSamples(),UnaryTestsF64::LL_F64_ID,mgr);
  421. outputd.create(input1.nbSamples(),UnaryTestsF64::D_F64_ID,mgr);
  422. outputp.create(input1.nbSamples(),UnaryTestsF64::PERM_S16_ID,mgr);
  423. outputa.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  424. outputb.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  425. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  426. tmpapat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDB_F64_ID,mgr);
  427. tmpbpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDC_F64_ID,mgr);
  428. tmpcpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDD_F64_ID,mgr);
  429. break;
  430. case TEST_MAT_LDL_F64_10:
  431. // Semi definite positive test
  432. input1.reload(UnaryTestsF64::INPUTSCHOLESKY1_SDPO_F64_ID,mgr);
  433. dims.reload(UnaryTestsF64::DIMSCHOLESKY1_SDPO_S16_ID,mgr);
  434. outputll.create(input1.nbSamples(),UnaryTestsF64::LL_F64_ID,mgr);
  435. outputd.create(input1.nbSamples(),UnaryTestsF64::D_F64_ID,mgr);
  436. outputp.create(input1.nbSamples(),UnaryTestsF64::PERM_S16_ID,mgr);
  437. outputa.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  438. outputb.create(input1.nbSamples(),UnaryTestsF64::OUTA_F64_ID,mgr);
  439. a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPA_F64_ID,mgr);
  440. tmpapat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDB_F64_ID,mgr);
  441. tmpbpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDC_F64_ID,mgr);
  442. tmpcpat.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF64::TMPDD_F64_ID,mgr);
  443. break;
  444. }
  445. }
  446. void UnaryTestsF64::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
  447. {
  448. (void)id;
  449. //output.dump(mgr);
  450. (void)mgr;
  451. }