matrix_utils.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /******************************************************************************
  2. * @file matrix_utils.h
  3. * @brief Public header file for CMSIS DSP Library
  4. * @version V1.11.0
  5. * @date 30 May 2022
  6. * Target Processor: Cortex-M and Cortex-A cores
  7. ******************************************************************************/
  8. /*
  9. * Copyright (c) 2010-2022 Arm Limited or its affiliates. All rights reserved.
  10. *
  11. * SPDX-License-Identifier: Apache-2.0
  12. *
  13. * Licensed under the Apache License, Version 2.0 (the License); you may
  14. * not use this file except in compliance with the License.
  15. * You may obtain a copy of the License at
  16. *
  17. * www.apache.org/licenses/LICENSE-2.0
  18. *
  19. * Unless required by applicable law or agreed to in writing, software
  20. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  21. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  22. * See the License for the specific language governing permissions and
  23. * limitations under the License.
  24. */
  25. #ifndef _MATRIX_UTILS_H_
  26. #define _MATRIX_UTILS_H_
  27. #include "arm_math_types.h"
  28. #include "arm_math_memory.h"
  29. #include "dsp/none.h"
  30. #include "dsp/utils.h"
  31. #ifdef __cplusplus
  32. extern "C"
  33. {
  34. #endif
  35. #define ELEM(A,ROW,COL) &((A)->pData[(A)->numCols* (ROW) + (COL)])
  36. #define SCALE_COL_T(T,CAST,A,ROW,v,i) \
  37. { \
  38. int32_t _w; \
  39. T *data = (A)->pData; \
  40. const int32_t _numCols = (A)->numCols; \
  41. const int32_t nb = (A)->numRows - ROW;\
  42. \
  43. data += i + _numCols * (ROW); \
  44. \
  45. for(_w=0;_w < nb; _w++) \
  46. { \
  47. *data *= CAST v; \
  48. data += _numCols; \
  49. } \
  50. }
  51. #define COPY_COL_T(T,A,ROW,COL,DST) \
  52. { \
  53. uint32_t _row; \
  54. T *_pb=DST; \
  55. T *_pa = (A)->pData + ROW * (A)->numCols + COL;\
  56. for(_row = ROW; _row < (A)->numRows; _row ++) \
  57. { \
  58. *_pb++ = *_pa; \
  59. _pa += (A)->numCols; \
  60. } \
  61. }
  62. #if defined(ARM_FLOAT16_SUPPORTED)
  63. #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
  64. #define SWAP_ROWS_F16(A,COL,i,j) \
  65. { \
  66. int cnt = ((A)->numCols)-(COL); \
  67. int32_t _w; \
  68. float16_t *data = (A)->pData; \
  69. const int32_t _numCols = (A)->numCols; \
  70. \
  71. for(_w=(COL);_w < _numCols; _w+=8) \
  72. { \
  73. f16x8_t tmpa,tmpb; \
  74. mve_pred16_t p0 = vctp16q(cnt); \
  75. \
  76. tmpa=vldrhq_z_f16(&data[i*_numCols + _w],p0);\
  77. tmpb=vldrhq_z_f16(&data[j*_numCols + _w],p0);\
  78. \
  79. vstrhq_p(&data[i*_numCols + _w], tmpb, p0); \
  80. vstrhq_p(&data[j*_numCols + _w], tmpa, p0); \
  81. \
  82. cnt -= 8; \
  83. } \
  84. }
  85. #define SCALE_ROW_F16(A,COL,v,i) \
  86. { \
  87. int cnt = ((A)->numCols)-(COL); \
  88. int32_t _w; \
  89. float16_t *data = (A)->pData; \
  90. const int32_t _numCols = (A)->numCols; \
  91. \
  92. for(_w=(COL);_w < _numCols; _w+=8) \
  93. { \
  94. f16x8_t tmpa; \
  95. mve_pred16_t p0 = vctp16q(cnt); \
  96. tmpa = vldrhq_z_f16(&data[i*_numCols + _w],p0);\
  97. tmpa = vmulq_n_f16(tmpa,(_Float16)v); \
  98. vstrhq_p(&data[i*_numCols + _w], tmpa, p0); \
  99. cnt -= 8; \
  100. } \
  101. \
  102. }
  103. #define MAC_ROW_F16(COL,A,i,v,B,j) \
  104. { \
  105. int cnt = ((A)->numCols)-(COL); \
  106. int32_t _w; \
  107. float16_t *dataA = (A)->pData; \
  108. float16_t *dataB = (B)->pData; \
  109. const int32_t _numCols = (A)->numCols; \
  110. \
  111. for(_w=(COL);_w < _numCols; _w+=8) \
  112. { \
  113. f16x8_t tmpa,tmpb; \
  114. mve_pred16_t p0 = vctp16q(cnt); \
  115. tmpa = vldrhq_z_f16(&dataA[i*_numCols + _w],p0);\
  116. tmpb = vldrhq_z_f16(&dataB[j*_numCols + _w],p0);\
  117. tmpa = vfmaq_n_f16(tmpa,tmpb,v); \
  118. vstrhq_p(&dataA[i*_numCols + _w], tmpa, p0); \
  119. cnt -= 8; \
  120. } \
  121. \
  122. }
  123. #define MAS_ROW_F16(COL,A,i,v,B,j) \
  124. { \
  125. int cnt = ((A)->numCols)-(COL); \
  126. int32_t _w; \
  127. float16_t *dataA = (A)->pData; \
  128. float16_t *dataB = (B)->pData; \
  129. const int32_t _numCols = (A)->numCols; \
  130. f16x8_t vec=vdupq_n_f16(v); \
  131. \
  132. for(_w=(COL);_w < _numCols; _w+=8) \
  133. { \
  134. f16x8_t tmpa,tmpb; \
  135. mve_pred16_t p0 = vctp16q(cnt); \
  136. tmpa = vldrhq_z_f16(&dataA[i*_numCols + _w],p0);\
  137. tmpb = vldrhq_z_f16(&dataB[j*_numCols + _w],p0);\
  138. tmpa = vfmsq_f16(tmpa,tmpb,vec); \
  139. vstrhq_p(&dataA[i*_numCols + _w], tmpa, p0); \
  140. cnt -= 8; \
  141. } \
  142. \
  143. }
  144. #else
  145. #define SWAP_ROWS_F16(A,COL,i,j) \
  146. { \
  147. int32_t _w; \
  148. float16_t *dataI = (A)->pData; \
  149. float16_t *dataJ = (A)->pData; \
  150. const int32_t _numCols = (A)->numCols;\
  151. const int32_t nb = _numCols-(COL); \
  152. \
  153. dataI += i*_numCols + (COL); \
  154. dataJ += j*_numCols + (COL); \
  155. \
  156. for(_w=0;_w < nb; _w++) \
  157. { \
  158. float16_t tmp; \
  159. tmp = *dataI; \
  160. *dataI++ = *dataJ; \
  161. *dataJ++ = tmp; \
  162. } \
  163. }
  164. #define SCALE_ROW_F16(A,COL,v,i) \
  165. { \
  166. int32_t _w; \
  167. float16_t *data = (A)->pData; \
  168. const int32_t _numCols = (A)->numCols;\
  169. const int32_t nb = _numCols-(COL); \
  170. \
  171. data += i*_numCols + (COL); \
  172. \
  173. for(_w=0;_w < nb; _w++) \
  174. { \
  175. *data++ *= (_Float16)v; \
  176. } \
  177. }
  178. #define MAC_ROW_F16(COL,A,i,v,B,j) \
  179. { \
  180. int32_t _w; \
  181. float16_t *dataA = (A)->pData; \
  182. float16_t *dataB = (B)->pData; \
  183. const int32_t _numCols = (A)->numCols; \
  184. const int32_t nb = _numCols-(COL); \
  185. \
  186. dataA += i*_numCols + (COL); \
  187. dataB += j*_numCols + (COL); \
  188. \
  189. for(_w=0;_w < nb; _w++) \
  190. { \
  191. *dataA++ += (_Float16)v * (_Float16)*dataB++;\
  192. } \
  193. }
  194. #define MAS_ROW_F16(COL,A,i,v,B,j) \
  195. { \
  196. int32_t _w; \
  197. float16_t *dataA = (A)->pData; \
  198. float16_t *dataB = (B)->pData; \
  199. const int32_t _numCols = (A)->numCols; \
  200. const int32_t nb = _numCols-(COL); \
  201. \
  202. dataA += i*_numCols + (COL); \
  203. dataB += j*_numCols + (COL); \
  204. \
  205. for(_w=0;_w < nb; _w++) \
  206. { \
  207. *dataA++ -= (_Float16)v * (_Float16)*dataB++;\
  208. } \
  209. }
  210. #endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/
  211. /* Functions with only a scalar version */
  212. #define COPY_COL_F16(A,ROW,COL,DST) \
  213. COPY_COL_T(float16_t,A,ROW,COL,DST)
  214. #define SCALE_COL_F16(A,ROW,v,i) \
  215. SCALE_COL_T(float16_t,(_Float16),A,ROW,v,i)
  216. #endif /* defined(ARM_FLOAT16_SUPPORTED)*/
  217. #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
  218. #define SWAP_ROWS_F32(A,COL,i,j) \
  219. { \
  220. int cnt = ((A)->numCols)-(COL); \
  221. float32_t *data = (A)->pData; \
  222. const int32_t _numCols = (A)->numCols; \
  223. int32_t _w; \
  224. \
  225. for(_w=(COL);_w < _numCols; _w+=4) \
  226. { \
  227. f32x4_t tmpa,tmpb; \
  228. mve_pred16_t p0 = vctp32q(cnt); \
  229. \
  230. tmpa=vldrwq_z_f32(&data[i*_numCols + _w],p0);\
  231. tmpb=vldrwq_z_f32(&data[j*_numCols + _w],p0);\
  232. \
  233. vstrwq_p(&data[i*_numCols + _w], tmpb, p0); \
  234. vstrwq_p(&data[j*_numCols + _w], tmpa, p0); \
  235. \
  236. cnt -= 4; \
  237. } \
  238. }
  239. #define MAC_ROW_F32(COL,A,i,v,B,j) \
  240. { \
  241. int cnt = ((A)->numCols)-(COL); \
  242. float32_t *dataA = (A)->pData; \
  243. float32_t *dataB = (B)->pData; \
  244. const int32_t _numCols = (A)->numCols; \
  245. int32_t _w; \
  246. \
  247. for(_w=(COL);_w < _numCols; _w+=4) \
  248. { \
  249. f32x4_t tmpa,tmpb; \
  250. mve_pred16_t p0 = vctp32q(cnt); \
  251. tmpa = vldrwq_z_f32(&dataA[i*_numCols + _w],p0);\
  252. tmpb = vldrwq_z_f32(&dataB[j*_numCols + _w],p0);\
  253. tmpa = vfmaq_n_f32(tmpa,tmpb,v); \
  254. vstrwq_p(&dataA[i*_numCols + _w], tmpa, p0); \
  255. cnt -= 4; \
  256. } \
  257. \
  258. }
  259. #define MAS_ROW_F32(COL,A,i,v,B,j) \
  260. { \
  261. int cnt = ((A)->numCols)-(COL); \
  262. float32_t *dataA = (A)->pData; \
  263. float32_t *dataB = (B)->pData; \
  264. const int32_t _numCols = (A)->numCols; \
  265. int32_t _w; \
  266. f32x4_t vec=vdupq_n_f32(v); \
  267. \
  268. for(_w=(COL);_w < _numCols; _w+=4) \
  269. { \
  270. f32x4_t tmpa,tmpb; \
  271. mve_pred16_t p0 = vctp32q(cnt); \
  272. tmpa = vldrwq_z_f32(&dataA[i*_numCols + _w],p0);\
  273. tmpb = vldrwq_z_f32(&dataB[j*_numCols + _w],p0);\
  274. tmpa = vfmsq_f32(tmpa,tmpb,vec); \
  275. vstrwq_p(&dataA[i*_numCols + _w], tmpa, p0); \
  276. cnt -= 4; \
  277. } \
  278. \
  279. }
  280. #define SCALE_ROW_F32(A,COL,v,i) \
  281. { \
  282. int cnt = ((A)->numCols)-(COL); \
  283. float32_t *data = (A)->pData; \
  284. const int32_t _numCols = (A)->numCols; \
  285. int32_t _w; \
  286. \
  287. for(_w=(COL);_w < _numCols; _w+=4) \
  288. { \
  289. f32x4_t tmpa; \
  290. mve_pred16_t p0 = vctp32q(cnt); \
  291. tmpa = vldrwq_z_f32(&data[i*_numCols + _w],p0);\
  292. tmpa = vmulq_n_f32(tmpa,v); \
  293. vstrwq_p(&data[i*_numCols + _w], tmpa, p0); \
  294. cnt -= 4; \
  295. } \
  296. \
  297. }
  298. #elif defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
  299. #define SWAP_ROWS_F32(A,COL,i,j) \
  300. { \
  301. int32_t _w; \
  302. float32_t *dataI = (A)->pData; \
  303. float32_t *dataJ = (A)->pData; \
  304. const int32_t _numCols = (A)->numCols;\
  305. const int32_t nb = _numCols - COL; \
  306. \
  307. dataI += i*_numCols + (COL); \
  308. dataJ += j*_numCols + (COL); \
  309. \
  310. float32_t tmp; \
  311. \
  312. for(_w=0;_w < nb; _w++) \
  313. { \
  314. tmp = *dataI; \
  315. *dataI++ = *dataJ; \
  316. *dataJ++ = tmp; \
  317. } \
  318. }
  319. #define MAC_ROW_F32(COL,A,i,v,B,j) \
  320. { \
  321. float32_t *dataA = (A)->pData; \
  322. float32_t *dataB = (B)->pData; \
  323. const int32_t _numCols = (A)->numCols;\
  324. const int32_t nb = _numCols - (COL); \
  325. int32_t nbElems; \
  326. f32x4_t vec = vdupq_n_f32(v); \
  327. \
  328. nbElems = nb >> 2; \
  329. \
  330. dataA += i*_numCols + (COL); \
  331. dataB += j*_numCols + (COL); \
  332. \
  333. while(nbElems>0) \
  334. { \
  335. f32x4_t tmpa,tmpb; \
  336. tmpa = vld1q_f32(dataA,p0); \
  337. tmpb = vld1q_f32(dataB,p0); \
  338. tmpa = vmlaq_f32(tmpa,tmpb,vec);\
  339. vst1q_f32(dataA, tmpa, p0); \
  340. nbElems--; \
  341. dataA += 4; \
  342. dataB += 4; \
  343. } \
  344. \
  345. nbElems = nb & 3; \
  346. while(nbElems > 0) \
  347. { \
  348. *dataA++ += v* *dataB++; \
  349. nbElems--; \
  350. } \
  351. }
  352. #define MAS_ROW_F32(COL,A,i,v,B,j) \
  353. { \
  354. float32_t *dataA = (A)->pData; \
  355. float32_t *dataB = (B)->pData; \
  356. const int32_t _numCols = (A)->numCols;\
  357. const int32_t nb = _numCols - (COL); \
  358. int32_t nbElems; \
  359. f32x4_t vec = vdupq_n_f32(v); \
  360. \
  361. nbElems = nb >> 2; \
  362. \
  363. dataA += i*_numCols + (COL); \
  364. dataB += j*_numCols + (COL); \
  365. \
  366. while(nbElems>0) \
  367. { \
  368. f32x4_t tmpa,tmpb; \
  369. tmpa = vld1q_f32(dataA); \
  370. tmpb = vld1q_f32(dataB); \
  371. tmpa = vmlsq_f32(tmpa,tmpb,vec);\
  372. vst1q_f32(dataA, tmpa); \
  373. nbElems--; \
  374. dataA += 4; \
  375. dataB += 4; \
  376. } \
  377. \
  378. nbElems = nb & 3; \
  379. while(nbElems > 0) \
  380. { \
  381. *dataA++ -= v* *dataB++; \
  382. nbElems--; \
  383. } \
  384. }
  385. #define SCALE_ROW_F32(A,COL,v,i) \
  386. { \
  387. float32_t *data = (A)->pData; \
  388. const int32_t _numCols = (A)->numCols; \
  389. const int32_t nb = _numCols - (COL); \
  390. int32_t nbElems; \
  391. f32x4_t vec = vdupq_n_f32(v); \
  392. \
  393. nbElems = nb >> 2; \
  394. \
  395. data += i*_numCols + (COL); \
  396. while(nbElems>0) \
  397. { \
  398. f32x4_t tmpa; \
  399. tmpa = vld1q_f32(data); \
  400. tmpa = vmulq_f32(tmpa,vec); \
  401. vst1q_f32(data, tmpa); \
  402. data += 4; \
  403. nbElems --; \
  404. } \
  405. \
  406. nbElems = nb & 3; \
  407. while(nbElems > 0) \
  408. { \
  409. *data++ *= v; \
  410. nbElems--; \
  411. } \
  412. \
  413. }
  414. #else
  415. #define SWAP_ROWS_F32(A,COL,i,j) \
  416. { \
  417. int32_t _w; \
  418. float32_t tmp; \
  419. float32_t *dataI = (A)->pData; \
  420. float32_t *dataJ = (A)->pData; \
  421. const int32_t _numCols = (A)->numCols;\
  422. const int32_t nb = _numCols - COL; \
  423. \
  424. dataI += i*_numCols + (COL); \
  425. dataJ += j*_numCols + (COL); \
  426. \
  427. \
  428. for(_w=0;_w < nb; _w++) \
  429. { \
  430. tmp = *dataI; \
  431. *dataI++ = *dataJ; \
  432. *dataJ++ = tmp; \
  433. } \
  434. }
  435. #define SCALE_ROW_F32(A,COL,v,i) \
  436. { \
  437. int32_t _w; \
  438. float32_t *data = (A)->pData; \
  439. const int32_t _numCols = (A)->numCols;\
  440. const int32_t nb = _numCols - COL; \
  441. \
  442. data += i*_numCols + (COL); \
  443. \
  444. for(_w=0;_w < nb; _w++) \
  445. { \
  446. *data++ *= v; \
  447. } \
  448. }
  449. #define MAC_ROW_F32(COL,A,i,v,B,j) \
  450. { \
  451. int32_t _w; \
  452. float32_t *dataA = (A)->pData; \
  453. float32_t *dataB = (B)->pData; \
  454. const int32_t _numCols = (A)->numCols;\
  455. const int32_t nb = _numCols-(COL); \
  456. \
  457. dataA = dataA + i*_numCols + (COL); \
  458. dataB = dataB + j*_numCols + (COL); \
  459. \
  460. for(_w=0;_w < nb; _w++) \
  461. { \
  462. *dataA++ += v* *dataB++; \
  463. } \
  464. }
  465. #define MAS_ROW_F32(COL,A,i,v,B,j) \
  466. { \
  467. int32_t _w; \
  468. float32_t *dataA = (A)->pData; \
  469. float32_t *dataB = (B)->pData; \
  470. const int32_t _numCols = (A)->numCols;\
  471. const int32_t nb = _numCols-(COL); \
  472. \
  473. dataA = dataA + i*_numCols + (COL); \
  474. dataB = dataB + j*_numCols + (COL); \
  475. \
  476. for(_w=0;_w < nb; _w++) \
  477. { \
  478. *dataA++ -= v* *dataB++; \
  479. } \
  480. }
  481. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  482. /* Functions _with only a scalar version */
  483. #define COPY_COL_F32(A,ROW,COL,DST) \
  484. COPY_COL_T(float32_t,A,ROW,COL,DST)
  485. #define COPY_COL_F64(A,ROW,COL,DST) \
  486. COPY_COL_T(float64_t,A,ROW,COL,DST)
  487. #define SWAP_COLS_F32(A,COL,i,j) \
  488. { \
  489. int32_t _w; \
  490. float32_t *data = (A)->pData; \
  491. const int32_t _numCols = (A)->numCols; \
  492. for(_w=(COL);_w < _numCols; _w++) \
  493. { \
  494. float32_t tmp; \
  495. tmp = data[_w*_numCols + i]; \
  496. data[_w*_numCols + i] = data[_w*_numCols + j];\
  497. data[_w*_numCols + j] = tmp; \
  498. } \
  499. }
  500. #define SCALE_COL_F32(A,ROW,v,i) \
  501. SCALE_COL_T(float32_t,,A,ROW,v,i)
  502. #define SWAP_ROWS_F64(A,COL,i,j) \
  503. { \
  504. int32_t _w; \
  505. float64_t *dataI = (A)->pData; \
  506. float64_t *dataJ = (A)->pData; \
  507. const int32_t _numCols = (A)->numCols;\
  508. const int32_t nb = _numCols-(COL); \
  509. \
  510. dataI += i*_numCols + (COL); \
  511. dataJ += j*_numCols + (COL); \
  512. \
  513. for(_w=0;_w < nb; _w++) \
  514. { \
  515. float64_t tmp; \
  516. tmp = *dataI; \
  517. *dataI++ = *dataJ; \
  518. *dataJ++ = tmp; \
  519. } \
  520. }
  521. #define SWAP_COLS_F64(A,COL,i,j) \
  522. { \
  523. int32_t _w; \
  524. float64_t *data = (A)->pData; \
  525. const int32_t _numCols = (A)->numCols; \
  526. for(_w=(COL);_w < _numCols; _w++) \
  527. { \
  528. float64_t tmp; \
  529. tmp = data[_w*_numCols + i]; \
  530. data[_w*_numCols + i] = data[_w*_numCols + j];\
  531. data[_w*_numCols + j] = tmp; \
  532. } \
  533. }
  534. #define SCALE_ROW_F64(A,COL,v,i) \
  535. { \
  536. int32_t _w; \
  537. float64_t *data = (A)->pData; \
  538. const int32_t _numCols = (A)->numCols;\
  539. const int32_t nb = _numCols-(COL); \
  540. \
  541. data += i*_numCols + (COL); \
  542. \
  543. for(_w=0;_w < nb; _w++) \
  544. { \
  545. *data++ *= v; \
  546. } \
  547. }
  548. #define SCALE_COL_F64(A,ROW,v,i) \
  549. SCALE_COL_T(float64_t,,A,ROW,v,i)
  550. #define MAC_ROW_F64(COL,A,i,v,B,j) \
  551. { \
  552. int32_t _w; \
  553. float64_t *dataA = (A)->pData; \
  554. float64_t *dataB = (B)->pData; \
  555. const int32_t _numCols = (A)->numCols;\
  556. const int32_t nb = _numCols-(COL); \
  557. \
  558. dataA += i*_numCols + (COL); \
  559. dataB += j*_numCols + (COL); \
  560. \
  561. for(_w=0;_w < nb; _w++) \
  562. { \
  563. *dataA++ += v* *dataB++; \
  564. } \
  565. }
  566. #define MAS_ROW_F64(COL,A,i,v,B,j) \
  567. { \
  568. int32_t _w; \
  569. float64_t *dataA = (A)->pData; \
  570. float64_t *dataB = (B)->pData; \
  571. const int32_t _numCols = (A)->numCols;\
  572. const int32_t nb = _numCols-(COL); \
  573. \
  574. dataA += i*_numCols + (COL); \
  575. dataB += j*_numCols + (COL); \
  576. \
  577. for(_w=0;_w < nb; _w++) \
  578. { \
  579. *dataA++ -= v* *dataB++; \
  580. } \
  581. }
  582. #ifdef __cplusplus
  583. }
  584. #endif
  585. #endif /* ifndef _MATRIX_UTILS_H_ */