arm_mat_qr_f64.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_mat_qr_f64.c
  4. * Description: Double floating-point matrix QR decomposition.
  5. *
  6. * $Date: 15 June 2022
  7. * $Revision: V1.11.0
  8. *
  9. * Target Processor: Cortex-M and Cortex-A cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "dsp/matrix_functions.h"
  29. #include "dsp/matrix_utils.h"
  30. /**
  31. @ingroup groupMatrix
  32. */
  33. /**
  34. @addtogroup MatrixQR
  35. @{
  36. */
  37. /**
  38. @brief QR decomposition of a m x n double floating point matrix with m >= n.
  39. @param[in] pSrc points to input matrix structure. The source matrix is modified by the function.
  40. @param[in] threshold norm2 threshold.
  41. @param[out] pOutR points to output R matrix structure of dimension m x n
  42. @param[out] pOutQ points to output Q matrix structure of dimension m x m (can be NULL)
  43. @param[out] pOutTau points to Householder scaling factors of dimension n
  44. @param[inout] pTmpA points to a temporary vector of dimension m.
  45. @param[inout] pTmpB points to a temporary vector of dimension m.
  46. @return execution status
  47. - \ref ARM_MATH_SUCCESS : Operation successful
  48. - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
  49. @par pOutQ is optional:
  50. pOutQ can be a NULL pointer.
  51. In this case, the argument will be ignored
  52. and the output Q matrix won't be computed.
  53. @par Norm2 threshold
  54. For the meaning of this argument please
  55. refer to the \ref MatrixHouseholder documentation
  56. */
  57. arm_status arm_mat_qr_f64(
  58. const arm_matrix_instance_f64 * pSrc,
  59. const float64_t threshold,
  60. arm_matrix_instance_f64 * pOutR,
  61. arm_matrix_instance_f64 * pOutQ,
  62. float64_t * pOutTau,
  63. float64_t *pTmpA,
  64. float64_t *pTmpB
  65. )
  66. {
  67. int32_t col=0;
  68. int32_t nb,pos;
  69. float64_t *pa,*pc;
  70. float64_t beta;
  71. float64_t *pv;
  72. float64_t *pdst;
  73. float64_t *p;
  74. if (pSrc->numRows < pSrc->numCols)
  75. {
  76. return(ARM_MATH_SIZE_MISMATCH);
  77. }
  78. memcpy(pOutR->pData,pSrc->pData,pSrc->numCols * pSrc->numRows*sizeof(float64_t));
  79. pOutR->numCols = pSrc->numCols;
  80. pOutR->numRows = pSrc->numRows;
  81. p = pOutR->pData;
  82. pc = pOutTau;
  83. for(col=0 ; col < pSrc->numCols; col++)
  84. {
  85. int32_t i,j,k,blkCnt;
  86. float64_t *pa0,*pa1,*pa2,*pa3;
  87. COPY_COL_F64(pOutR,col,col,pTmpA);
  88. beta = arm_householder_f64(pTmpA,threshold,pSrc->numRows - col,pTmpA);
  89. *pc++ = beta;
  90. pdst = pTmpB;
  91. /* v.T A(col:,col:) -> tmpb */
  92. pv = pTmpA;
  93. pa = p;
  94. for(j=0;j<pSrc->numCols-col; j++)
  95. {
  96. *pdst++ = *pv * *pa++;
  97. }
  98. pa += col;
  99. pv++;
  100. pdst = pTmpB;
  101. pa0 = pa;
  102. pa1 = pa0 + pSrc->numCols;
  103. pa2 = pa1 + pSrc->numCols;
  104. pa3 = pa2 + pSrc->numCols;
  105. /* Unrolled loop */
  106. blkCnt = (pSrc->numRows-col - 1) >> 2;
  107. k=1;
  108. while(blkCnt > 0)
  109. {
  110. float64_t sum;
  111. for(j=0;j<pSrc->numCols-col; j++)
  112. {
  113. sum = *pdst;
  114. sum += pv[0] * *pa0++;
  115. sum += pv[1] * *pa1++;
  116. sum += pv[2] * *pa2++;
  117. sum += pv[3] * *pa3++;
  118. *pdst++ = sum;
  119. }
  120. pa0 += col + 3*pSrc->numCols;
  121. pa1 += col + 3*pSrc->numCols;
  122. pa2 += col + 3*pSrc->numCols;
  123. pa3 += col + 3*pSrc->numCols;
  124. pv += 4;
  125. pdst = pTmpB;
  126. k += 4;
  127. blkCnt--;
  128. }
  129. pa = pa0;
  130. for(;k<pSrc->numRows-col; k++)
  131. {
  132. for(j=0;j<pSrc->numCols-col; j++)
  133. {
  134. *pdst++ += *pv * *pa++;
  135. }
  136. pa += col;
  137. pv++;
  138. pdst = pTmpB;
  139. }
  140. /* A(col:,col:) - beta v tmpb */
  141. pa = p;
  142. for(j=0;j<pSrc->numRows-col; j++)
  143. {
  144. float64_t f = beta * pTmpA[j];
  145. for(i=0;i<pSrc->numCols-col; i++)
  146. {
  147. *pa = *pa - f * pTmpB[i] ;
  148. pa++;
  149. }
  150. pa += col;
  151. }
  152. /* Copy Householder reflectors into R matrix */
  153. pa = p + pOutR->numCols;
  154. for(k=0;k<pSrc->numRows-col-1; k++)
  155. {
  156. *pa = pTmpA[k+1];
  157. pa += pOutR->numCols;
  158. }
  159. p += 1 + pOutR->numCols;
  160. }
  161. /* Generate Q if requested by user matrix */
  162. if (pOutQ != NULL)
  163. {
  164. /* Initialize Q matrix to identity */
  165. memset(pOutQ->pData,0,sizeof(float64_t)*pOutQ->numRows*pOutQ->numRows);
  166. pa = pOutQ->pData;
  167. for(col=0 ; col < pOutQ->numCols; col++)
  168. {
  169. *pa = 1.0;
  170. pa += pOutQ->numCols+1;
  171. }
  172. nb = pOutQ->numRows - pOutQ->numCols + 1;
  173. pc = pOutTau + pOutQ->numCols - 1;
  174. for(col=0 ; col < pOutQ->numCols; col++)
  175. {
  176. int32_t i,j,k, blkCnt;
  177. float64_t *pa0,*pa1,*pa2,*pa3;
  178. pos = pSrc->numRows - nb;
  179. p = pOutQ->pData + pos + pOutQ->numCols*pos ;
  180. COPY_COL_F64(pOutR,pos,pos,pTmpA);
  181. pTmpA[0] = 1.0;
  182. pdst = pTmpB;
  183. /* v.T A(col:,col:) -> tmpb */
  184. pv = pTmpA;
  185. pa = p;
  186. for(j=0;j<pOutQ->numRows-pos; j++)
  187. {
  188. *pdst++ = *pv * *pa++;
  189. }
  190. pa += pos;
  191. pv++;
  192. pdst = pTmpB;
  193. pa0 = pa;
  194. pa1 = pa0 + pOutQ->numRows;
  195. pa2 = pa1 + pOutQ->numRows;
  196. pa3 = pa2 + pOutQ->numRows;
  197. /* Unrolled loop */
  198. blkCnt = (pOutQ->numRows-pos - 1) >> 2;
  199. k=1;
  200. while(blkCnt > 0)
  201. {
  202. float64_t sum;
  203. for(j=0;j<pOutQ->numRows-pos; j++)
  204. {
  205. sum = *pdst;
  206. sum += pv[0] * *pa0++;
  207. sum += pv[1] * *pa1++;
  208. sum += pv[2] * *pa2++;
  209. sum += pv[3] * *pa3++;
  210. *pdst++ = sum;
  211. }
  212. pa0 += pos + 3*pOutQ->numRows;
  213. pa1 += pos + 3*pOutQ->numRows;
  214. pa2 += pos + 3*pOutQ->numRows;
  215. pa3 += pos + 3*pOutQ->numRows;
  216. pv += 4;
  217. pdst = pTmpB;
  218. k += 4;
  219. blkCnt--;
  220. }
  221. pa = pa0;
  222. for(;k<pOutQ->numRows-pos; k++)
  223. {
  224. for(j=0;j<pOutQ->numRows-pos; j++)
  225. {
  226. *pdst++ += *pv * *pa++;
  227. }
  228. pa += pos;
  229. pv++;
  230. pdst = pTmpB;
  231. }
  232. pa = p;
  233. beta = *pc--;
  234. for(j=0;j<pOutQ->numRows-pos; j++)
  235. {
  236. float64_t f = beta * pTmpA[j];
  237. for(i=0;i<pOutQ->numCols-pos; i++)
  238. {
  239. *pa = *pa - f * pTmpB[i] ;
  240. pa++;
  241. }
  242. pa += pos;
  243. }
  244. nb++;
  245. }
  246. }
  247. arm_status status = ARM_MATH_SUCCESS;
  248. /* Return to application */
  249. return (status);
  250. }
  251. /**
  252. @} end of MatrixQR group
  253. */