test_fp.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. /*
  2. * SPDX-FileCopyrightText: 2021 Espressif Systems (Shanghai) CO LTD
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. #include "sdkconfig.h"
  7. #if CONFIG_IDF_TARGET_ESP32
  8. #include <math.h>
  9. #include <stdio.h>
  10. #include "freertos/FreeRTOS.h"
  11. #include "freertos/task.h"
  12. #include "freertos/semphr.h"
  13. #include "unity.h"
  14. #include "test_utils.h"
  15. /* Note: these functions are included here for unit test purposes. They are not needed for writing
  16. * normal code. If writing standard C floating point code, libgcc should correctly include implementations
  17. * that use the floating point registers correctly. */
  18. static float addsf(float a, float b)
  19. {
  20. float result;
  21. asm volatile (
  22. "wfr f0, %1\n"
  23. "wfr f1, %2\n"
  24. "add.s f2, f0, f1\n"
  25. "rfr %0, f2\n"
  26. :"=r"(result):"r"(a), "r"(b)
  27. );
  28. return result;
  29. }
  30. static float mulsf(float a, float b)
  31. {
  32. float result;
  33. asm volatile (
  34. "wfr f0, %1\n"
  35. "wfr f1, %2\n"
  36. "mul.s f2, f0, f1\n"
  37. "rfr %0, f2\n"
  38. :"=r"(result):"r"(a), "r"(b)
  39. );
  40. return result;
  41. }
  42. static float divsf(float a, float b)
  43. {
  44. float result;
  45. asm volatile (
  46. "wfr f0, %1\n"
  47. "wfr f1, %2\n"
  48. "div0.s f3, f1 \n"
  49. "nexp01.s f4, f1 \n"
  50. "const.s f5, 1 \n"
  51. "maddn.s f5, f4, f3 \n"
  52. "mov.s f6, f3 \n"
  53. "mov.s f7, f1 \n"
  54. "nexp01.s f8, f0 \n"
  55. "maddn.s f6, f5, f3 \n"
  56. "const.s f5, 1 \n"
  57. "const.s f2, 0 \n"
  58. "neg.s f9, f8 \n"
  59. "maddn.s f5,f4,f6 \n"
  60. "maddn.s f2, f9, f3 \n"
  61. "mkdadj.s f7, f0 \n"
  62. "maddn.s f6,f5,f6 \n"
  63. "maddn.s f9,f4,f2 \n"
  64. "const.s f5, 1 \n"
  65. "maddn.s f5,f4,f6 \n"
  66. "maddn.s f2,f9,f6 \n"
  67. "neg.s f9, f8 \n"
  68. "maddn.s f6,f5,f6 \n"
  69. "maddn.s f9,f4,f2 \n"
  70. "addexpm.s f2, f7 \n"
  71. "addexp.s f6, f7 \n"
  72. "divn.s f2,f9,f6\n"
  73. "rfr %0, f2\n"
  74. :"=r"(result):"r"(a), "r"(b)
  75. );
  76. return result;
  77. }
  78. static float sqrtsf(float a)
  79. {
  80. float result;
  81. asm volatile (
  82. "wfr f0, %1\n"
  83. "sqrt0.s f2, f0\n"
  84. "const.s f5, 0\n"
  85. "maddn.s f5, f2, f2\n"
  86. "nexp01.s f3, f0\n"
  87. "const.s f4, 3\n"
  88. "addexp.s f3, f4\n"
  89. "maddn.s f4, f5, f3\n"
  90. "nexp01.s f5, f0\n"
  91. "neg.s f6, f5\n"
  92. "maddn.s f2, f4, f2\n"
  93. "const.s f1, 0\n"
  94. "const.s f4, 0\n"
  95. "const.s f7, 0\n"
  96. "maddn.s f1, f6, f2\n"
  97. "maddn.s f4, f2, f3\n"
  98. "const.s f6, 3\n"
  99. "maddn.s f7, f6, f2\n"
  100. "maddn.s f5, f1, f1\n"
  101. "maddn.s f6, f4, f2\n"
  102. "neg.s f3, f7\n"
  103. "maddn.s f1, f5, f3\n"
  104. "maddn.s f7, f6, f7\n"
  105. "mksadj.s f2, f0\n"
  106. "nexp01.s f5, f0\n"
  107. "maddn.s f5, f1, f1\n"
  108. "neg.s f3, f7\n"
  109. "addexpm.s f1, f2\n"
  110. "addexp.s f3, f2\n"
  111. "divn.s f1, f5, f3\n"
  112. "rfr %0, f1\n"
  113. :"=r"(result):"r"(a)
  114. );
  115. return result;
  116. }
  117. TEST_CASE("test FP add", "[fp]")
  118. {
  119. float a = 100.0f;
  120. float b = 0.5f;
  121. float c = addsf(a, b);
  122. float eps = c - 100.5f;
  123. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  124. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  125. }
  126. TEST_CASE("test FP mul", "[fp]")
  127. {
  128. float a = 100.0f;
  129. float b = 0.05f;
  130. float c = mulsf(a, b);
  131. float eps = c - 5.0f;
  132. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  133. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  134. }
  135. TEST_CASE("test FP div", "[fp]")
  136. {
  137. float a = 100.0f;
  138. float b = 5.0f;
  139. float c = divsf(a, b);
  140. float eps = c - 20.0f;
  141. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  142. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  143. }
  144. TEST_CASE("test FP sqrt", "[fp]")
  145. {
  146. float a = 100.0f;
  147. float c = sqrtsf(a);
  148. float eps = c - 10.0f;
  149. printf("a=%g c=%g eps=%g\r\n", a, c, eps);
  150. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  151. }
  152. struct TestFPState {
  153. int fail;
  154. SemaphoreHandle_t done;
  155. };
  156. static const int testFpIter = 100000;
  157. static void tskTestFP(void *pvParameters)
  158. {
  159. struct TestFPState *state = (struct TestFPState *) pvParameters;
  160. for (int i = 0; i < testFpIter; ++i) {
  161. // calculate zero in a slightly obscure way
  162. float y = sqrtsf(addsf(1.0f, divsf(mulsf(sqrtsf(2), sqrtsf(2)), 2.0f)));
  163. y = mulsf(y, y);
  164. y = addsf(y, -2.0f);
  165. // check that result is not far from zero
  166. float eps = fabs(y);
  167. if (eps > 1e-6f) {
  168. state->fail++;
  169. printf("%s: i=%d y=%f eps=%f\r\n", __func__, i, y, eps);
  170. }
  171. }
  172. TEST_ASSERT(xSemaphoreGive(state->done));
  173. vTaskDelete(NULL);
  174. }
  175. TEST_CASE("context switch saves FP registers", "[fp]")
  176. {
  177. struct TestFPState state = {
  178. .done = xSemaphoreCreateCounting(4, 0)
  179. };
  180. TEST_ASSERT_NOT_NULL(state.done);
  181. const int prio = UNITY_FREERTOS_PRIORITY + 1;
  182. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk1", 2048, &state, prio, NULL, 0));
  183. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk2", 2048, &state, prio, NULL, 0));
  184. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk3", 2048, &state, prio, NULL, portNUM_PROCESSORS - 1));
  185. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk4", 2048, &state, prio, NULL, 0));
  186. for (int i = 0; i < 4; ++i) {
  187. TEST_ASSERT(xSemaphoreTake(state.done, pdMS_TO_TICKS(5000)));
  188. }
  189. vSemaphoreDelete(state.done);
  190. if (state.fail) {
  191. const int total = testFpIter * 4;
  192. printf("Failed: %d, total: %d\r\n", state.fail, total);
  193. }
  194. TEST_ASSERT(state.fail == 0);
  195. }
  196. /* Note: not static, to avoid optimisation of const result */
  197. float IRAM_ATTR test_fp_benchmark_fp_divide(int counts, unsigned *cycles)
  198. {
  199. float f = MAXFLOAT;
  200. uint32_t before, after;
  201. RSR(CCOUNT, before);
  202. for (int i = 0; i < counts; i++) {
  203. f /= 1.000432f;
  204. }
  205. RSR(CCOUNT, after);
  206. *cycles = (after - before) / counts;
  207. return f;
  208. }
  209. TEST_CASE("floating point division performance", "[fp]")
  210. {
  211. const unsigned COUNTS = 1000;
  212. unsigned cycles = 0;
  213. // initialize fpu
  214. volatile __attribute__((unused)) float dummy = sqrtf(rand());
  215. float f = test_fp_benchmark_fp_divide(COUNTS, &cycles);
  216. printf("%d divisions from %f = %f\n", COUNTS, MAXFLOAT, f);
  217. printf("Per division = %d cycles\n", cycles);
  218. TEST_PERFORMANCE_LESS_THAN(CYCLES_PER_DIV, "%d cycles", cycles);
  219. }
  220. /* Note: not static, to avoid optimisation of const result */
  221. float IRAM_ATTR test_fp_benchmark_fp_sqrt(int counts, unsigned *cycles)
  222. {
  223. float f = MAXFLOAT;
  224. uint32_t before, after;
  225. RSR(CCOUNT, before);
  226. for (int i = 0; i < counts; i++) {
  227. f = sqrtf(f);
  228. }
  229. RSR(CCOUNT, after);
  230. *cycles = (after - before) / counts;
  231. return f;
  232. }
  233. TEST_CASE("floating point square root performance", "[fp]")
  234. {
  235. const unsigned COUNTS = 200;
  236. unsigned cycles = 0;
  237. // initialize fpu
  238. volatile float __attribute__((unused)) dummy = sqrtf(rand());
  239. float f = test_fp_benchmark_fp_sqrt(COUNTS, &cycles);
  240. printf("%d square roots from %f = %f\n", COUNTS, MAXFLOAT, f);
  241. printf("Per sqrt = %d cycles\n", cycles);
  242. TEST_PERFORMANCE_LESS_THAN(CYCLES_PER_SQRT, "%d cycles", cycles);
  243. }
  244. #endif // CONFIG_IDF_TARGET_ESP32