test_fp.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. /*
  2. * SPDX-FileCopyrightText: 2021 Espressif Systems (Shanghai) CO LTD
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. #include "sdkconfig.h"
  7. #if CONFIG_IDF_TARGET_ESP32
  8. #include <math.h>
  9. #include <stdio.h>
  10. #include "soc/cpu.h"
  11. #include "freertos/FreeRTOS.h"
  12. #include "freertos/task.h"
  13. #include "freertos/semphr.h"
  14. #include "unity.h"
  15. #include "test_utils.h"
  16. /* Note: these functions are included here for unit test purposes. They are not needed for writing
  17. * normal code. If writing standard C floating point code, libgcc should correctly include implementations
  18. * that use the floating point registers correctly. */
  19. static float addsf(float a, float b)
  20. {
  21. float result;
  22. asm volatile (
  23. "wfr f0, %1\n"
  24. "wfr f1, %2\n"
  25. "add.s f2, f0, f1\n"
  26. "rfr %0, f2\n"
  27. :"=r"(result):"r"(a), "r"(b)
  28. );
  29. return result;
  30. }
  31. static float mulsf(float a, float b)
  32. {
  33. float result;
  34. asm volatile (
  35. "wfr f0, %1\n"
  36. "wfr f1, %2\n"
  37. "mul.s f2, f0, f1\n"
  38. "rfr %0, f2\n"
  39. :"=r"(result):"r"(a), "r"(b)
  40. );
  41. return result;
  42. }
  43. static float divsf(float a, float b)
  44. {
  45. float result;
  46. asm volatile (
  47. "wfr f0, %1\n"
  48. "wfr f1, %2\n"
  49. "div0.s f3, f1 \n"
  50. "nexp01.s f4, f1 \n"
  51. "const.s f5, 1 \n"
  52. "maddn.s f5, f4, f3 \n"
  53. "mov.s f6, f3 \n"
  54. "mov.s f7, f1 \n"
  55. "nexp01.s f8, f0 \n"
  56. "maddn.s f6, f5, f3 \n"
  57. "const.s f5, 1 \n"
  58. "const.s f2, 0 \n"
  59. "neg.s f9, f8 \n"
  60. "maddn.s f5,f4,f6 \n"
  61. "maddn.s f2, f9, f3 \n"
  62. "mkdadj.s f7, f0 \n"
  63. "maddn.s f6,f5,f6 \n"
  64. "maddn.s f9,f4,f2 \n"
  65. "const.s f5, 1 \n"
  66. "maddn.s f5,f4,f6 \n"
  67. "maddn.s f2,f9,f6 \n"
  68. "neg.s f9, f8 \n"
  69. "maddn.s f6,f5,f6 \n"
  70. "maddn.s f9,f4,f2 \n"
  71. "addexpm.s f2, f7 \n"
  72. "addexp.s f6, f7 \n"
  73. "divn.s f2,f9,f6\n"
  74. "rfr %0, f2\n"
  75. :"=r"(result):"r"(a), "r"(b)
  76. );
  77. return result;
  78. }
  79. static float sqrtsf(float a)
  80. {
  81. float result;
  82. asm volatile (
  83. "wfr f0, %1\n"
  84. "sqrt0.s f2, f0\n"
  85. "const.s f5, 0\n"
  86. "maddn.s f5, f2, f2\n"
  87. "nexp01.s f3, f0\n"
  88. "const.s f4, 3\n"
  89. "addexp.s f3, f4\n"
  90. "maddn.s f4, f5, f3\n"
  91. "nexp01.s f5, f0\n"
  92. "neg.s f6, f5\n"
  93. "maddn.s f2, f4, f2\n"
  94. "const.s f1, 0\n"
  95. "const.s f4, 0\n"
  96. "const.s f7, 0\n"
  97. "maddn.s f1, f6, f2\n"
  98. "maddn.s f4, f2, f3\n"
  99. "const.s f6, 3\n"
  100. "maddn.s f7, f6, f2\n"
  101. "maddn.s f5, f1, f1\n"
  102. "maddn.s f6, f4, f2\n"
  103. "neg.s f3, f7\n"
  104. "maddn.s f1, f5, f3\n"
  105. "maddn.s f7, f6, f7\n"
  106. "mksadj.s f2, f0\n"
  107. "nexp01.s f5, f0\n"
  108. "maddn.s f5, f1, f1\n"
  109. "neg.s f3, f7\n"
  110. "addexpm.s f1, f2\n"
  111. "addexp.s f3, f2\n"
  112. "divn.s f1, f5, f3\n"
  113. "rfr %0, f1\n"
  114. :"=r"(result):"r"(a)
  115. );
  116. return result;
  117. }
  118. TEST_CASE("test FP add", "[fp]")
  119. {
  120. float a = 100.0f;
  121. float b = 0.5f;
  122. float c = addsf(a, b);
  123. float eps = c - 100.5f;
  124. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  125. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  126. }
  127. TEST_CASE("test FP mul", "[fp]")
  128. {
  129. float a = 100.0f;
  130. float b = 0.05f;
  131. float c = mulsf(a, b);
  132. float eps = c - 5.0f;
  133. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  134. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  135. }
  136. TEST_CASE("test FP div", "[fp]")
  137. {
  138. float a = 100.0f;
  139. float b = 5.0f;
  140. float c = divsf(a, b);
  141. float eps = c - 20.0f;
  142. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  143. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  144. }
  145. TEST_CASE("test FP sqrt", "[fp]")
  146. {
  147. float a = 100.0f;
  148. float c = sqrtsf(a);
  149. float eps = c - 10.0f;
  150. printf("a=%g c=%g eps=%g\r\n", a, c, eps);
  151. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  152. }
  153. struct TestFPState {
  154. int fail;
  155. SemaphoreHandle_t done;
  156. };
  157. static const int testFpIter = 100000;
  158. static void tskTestFP(void *pvParameters)
  159. {
  160. struct TestFPState *state = (struct TestFPState *) pvParameters;
  161. for (int i = 0; i < testFpIter; ++i) {
  162. // calculate zero in a slightly obscure way
  163. float y = sqrtsf(addsf(1.0f, divsf(mulsf(sqrtsf(2), sqrtsf(2)), 2.0f)));
  164. y = mulsf(y, y);
  165. y = addsf(y, -2.0f);
  166. // check that result is not far from zero
  167. float eps = fabs(y);
  168. if (eps > 1e-6f) {
  169. state->fail++;
  170. printf("%s: i=%d y=%f eps=%f\r\n", __func__, i, y, eps);
  171. }
  172. }
  173. TEST_ASSERT(xSemaphoreGive(state->done));
  174. vTaskDelete(NULL);
  175. }
  176. TEST_CASE("context switch saves FP registers", "[fp]")
  177. {
  178. struct TestFPState state = {
  179. .done = xSemaphoreCreateCounting(4, 0)
  180. };
  181. TEST_ASSERT_NOT_NULL(state.done);
  182. const int prio = UNITY_FREERTOS_PRIORITY + 1;
  183. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk1", 2048, &state, prio, NULL, 0));
  184. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk2", 2048, &state, prio, NULL, 0));
  185. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk3", 2048, &state, prio, NULL, portNUM_PROCESSORS - 1));
  186. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk4", 2048, &state, prio, NULL, 0));
  187. for (int i = 0; i < 4; ++i) {
  188. TEST_ASSERT(xSemaphoreTake(state.done, pdMS_TO_TICKS(5000)));
  189. }
  190. vSemaphoreDelete(state.done);
  191. if (state.fail) {
  192. const int total = testFpIter * 4;
  193. printf("Failed: %d, total: %d\r\n", state.fail, total);
  194. }
  195. TEST_ASSERT(state.fail == 0);
  196. }
  197. /* Note: not static, to avoid optimisation of const result */
  198. float IRAM_ATTR test_fp_benchmark_fp_divide(int counts, unsigned *cycles)
  199. {
  200. float f = MAXFLOAT;
  201. uint32_t before, after;
  202. RSR(CCOUNT, before);
  203. for (int i = 0; i < counts; i++) {
  204. f /= 1.000432f;
  205. }
  206. RSR(CCOUNT, after);
  207. *cycles = (after - before) / counts;
  208. return f;
  209. }
  210. TEST_CASE("floating point division performance", "[fp]")
  211. {
  212. const unsigned COUNTS = 1000;
  213. unsigned cycles = 0;
  214. // initialize fpu
  215. volatile __attribute__((unused)) float dummy = sqrtf(rand());
  216. float f = test_fp_benchmark_fp_divide(COUNTS, &cycles);
  217. printf("%d divisions from %f = %f\n", COUNTS, MAXFLOAT, f);
  218. printf("Per division = %d cycles\n", cycles);
  219. TEST_PERFORMANCE_LESS_THAN(CYCLES_PER_DIV, "%d cycles", cycles);
  220. }
  221. /* Note: not static, to avoid optimisation of const result */
  222. float IRAM_ATTR test_fp_benchmark_fp_sqrt(int counts, unsigned *cycles)
  223. {
  224. float f = MAXFLOAT;
  225. uint32_t before, after;
  226. RSR(CCOUNT, before);
  227. for (int i = 0; i < counts; i++) {
  228. f = sqrtf(f);
  229. }
  230. RSR(CCOUNT, after);
  231. *cycles = (after - before) / counts;
  232. return f;
  233. }
  234. TEST_CASE("floating point square root performance", "[fp]")
  235. {
  236. const unsigned COUNTS = 200;
  237. unsigned cycles = 0;
  238. // initialize fpu
  239. volatile float __attribute__((unused)) dummy = sqrtf(rand());
  240. float f = test_fp_benchmark_fp_sqrt(COUNTS, &cycles);
  241. printf("%d square roots from %f = %f\n", COUNTS, MAXFLOAT, f);
  242. printf("Per sqrt = %d cycles\n", cycles);
  243. TEST_PERFORMANCE_LESS_THAN(CYCLES_PER_SQRT, "%d cycles", cycles);
  244. }
  245. #endif // CONFIG_IDF_TARGET_ESP32