arm_logsumexp_f16.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_logsumexp_f16.c
  4. * Description: LogSumExp
  5. *
  6. *
  7. * Target Processor: Cortex-M and Cortex-A cores
  8. * -------------------------------------------------------------------- */
  9. /*
  10. * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
  11. *
  12. * SPDX-License-Identifier: Apache-2.0
  13. *
  14. * Licensed under the Apache License, Version 2.0 (the License); you may
  15. * not use this file except in compliance with the License.
  16. * You may obtain a copy of the License at
  17. *
  18. * www.apache.org/licenses/LICENSE-2.0
  19. *
  20. * Unless required by applicable law or agreed to in writing, software
  21. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  22. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23. * See the License for the specific language governing permissions and
  24. * limitations under the License.
  25. */
  26. #include "dsp/statistics_functions_f16.h"
  27. #if defined(ARM_FLOAT16_SUPPORTED)
  28. #include <limits.h>
  29. #include <math.h>
  30. /**
  31. * @addtogroup LogSumExp
  32. * @{
  33. */
  34. /**
  35. * @brief Computation of the LogSumExp
  36. *
  37. * In probabilistic computations, the dynamic of the probability values can be very
  38. * wide because they come from gaussian functions.
  39. * To avoid underflow and overflow issues, the values are represented by their log.
  40. * In this representation, multiplying the original exp values is easy : their logs are added.
  41. * But adding the original exp values is requiring some special handling and it is the
  42. * goal of the LogSumExp function.
  43. *
  44. * If the values are x1...xn, the function is computing:
  45. *
  46. * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
  47. * rounding issues are minimised.
  48. *
  49. * The max xm of the values is extracted and the function is computing:
  50. * xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
  51. *
  52. * @param[in] *in Pointer to an array of input values.
  53. * @param[in] blockSize Number of samples in the input array.
  54. * @return LogSumExp
  55. *
  56. */
  57. #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
  58. #include "arm_helium_utils.h"
  59. #include "arm_vec_math_f16.h"
  60. float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
  61. {
  62. float16_t maxVal;
  63. const float16_t *pIn;
  64. int32_t blkCnt;
  65. _Float16 accum=0.0f16;
  66. _Float16 tmp;
  67. arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
  68. blkCnt = blockSize;
  69. pIn = in;
  70. f16x8_t vSum = vdupq_n_f16(0.0f16);
  71. blkCnt = blockSize >> 3;
  72. while(blkCnt > 0)
  73. {
  74. f16x8_t vecIn = vld1q(pIn);
  75. f16x8_t vecExp;
  76. vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal));
  77. vSum = vaddq_f16(vSum, vecExp);
  78. /*
  79. * Decrement the blockSize loop counter
  80. * Advance vector source and destination pointers
  81. */
  82. pIn += 8;
  83. blkCnt --;
  84. }
  85. /* sum + log */
  86. accum = vecAddAcrossF16Mve(vSum);
  87. blkCnt = blockSize & 0x7;
  88. while(blkCnt > 0)
  89. {
  90. tmp = *pIn++;
  91. accum += expf(tmp - maxVal);
  92. blkCnt--;
  93. }
  94. accum = maxVal + logf(accum);
  95. return (accum);
  96. }
  97. #else
  98. float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
  99. {
  100. _Float16 maxVal;
  101. _Float16 tmp;
  102. const float16_t *pIn;
  103. uint32_t blkCnt;
  104. _Float16 accum;
  105. pIn = in;
  106. blkCnt = blockSize;
  107. maxVal = *pIn++;
  108. blkCnt--;
  109. while(blkCnt > 0)
  110. {
  111. tmp = *pIn++;
  112. if (tmp > maxVal)
  113. {
  114. maxVal = tmp;
  115. }
  116. blkCnt--;
  117. }
  118. blkCnt = blockSize;
  119. pIn = in;
  120. accum = 0;
  121. while(blkCnt > 0)
  122. {
  123. tmp = *pIn++;
  124. accum += expf(tmp - maxVal);
  125. blkCnt--;
  126. }
  127. accum = maxVal + logf(accum);
  128. return(accum);
  129. }
  130. #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
  131. /**
  132. * @} end of LogSumExp group
  133. */
  134. #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */