test_fp.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. #include <math.h>
  2. #include <stdio.h>
  3. #include "soc/cpu.h"
  4. #include "freertos/FreeRTOS.h"
  5. #include "freertos/task.h"
  6. #include "freertos/semphr.h"
  7. #include "unity.h"
  8. #include "test_utils.h"
  9. /* Note: these functions are included here for unit test purposes. They are not needed for writing
  10. * normal code. If writing standard C floating point code, libgcc should correctly include implementations
  11. * that use the floating point registers correctly. */
  12. static float addsf(float a, float b)
  13. {
  14. float result;
  15. asm volatile (
  16. "wfr f0, %1\n"
  17. "wfr f1, %2\n"
  18. "add.s f2, f0, f1\n"
  19. "rfr %0, f2\n"
  20. :"=r"(result):"r"(a), "r"(b)
  21. );
  22. return result;
  23. }
  24. static float mulsf(float a, float b)
  25. {
  26. float result;
  27. asm volatile (
  28. "wfr f0, %1\n"
  29. "wfr f1, %2\n"
  30. "mul.s f2, f0, f1\n"
  31. "rfr %0, f2\n"
  32. :"=r"(result):"r"(a), "r"(b)
  33. );
  34. return result;
  35. }
  36. static float divsf(float a, float b)
  37. {
  38. float result;
  39. asm volatile (
  40. "wfr f0, %1\n"
  41. "wfr f1, %2\n"
  42. "div0.s f3, f1 \n"
  43. "nexp01.s f4, f1 \n"
  44. "const.s f5, 1 \n"
  45. "maddn.s f5, f4, f3 \n"
  46. "mov.s f6, f3 \n"
  47. "mov.s f7, f1 \n"
  48. "nexp01.s f8, f0 \n"
  49. "maddn.s f6, f5, f3 \n"
  50. "const.s f5, 1 \n"
  51. "const.s f2, 0 \n"
  52. "neg.s f9, f8 \n"
  53. "maddn.s f5,f4,f6 \n"
  54. "maddn.s f2, f9, f3 \n"
  55. "mkdadj.s f7, f0 \n"
  56. "maddn.s f6,f5,f6 \n"
  57. "maddn.s f9,f4,f2 \n"
  58. "const.s f5, 1 \n"
  59. "maddn.s f5,f4,f6 \n"
  60. "maddn.s f2,f9,f6 \n"
  61. "neg.s f9, f8 \n"
  62. "maddn.s f6,f5,f6 \n"
  63. "maddn.s f9,f4,f2 \n"
  64. "addexpm.s f2, f7 \n"
  65. "addexp.s f6, f7 \n"
  66. "divn.s f2,f9,f6\n"
  67. "rfr %0, f2\n"
  68. :"=r"(result):"r"(a), "r"(b)
  69. );
  70. return result;
  71. }
  72. static float sqrtsf(float a)
  73. {
  74. float result;
  75. asm volatile (
  76. "wfr f0, %1\n"
  77. "sqrt0.s f2, f0\n"
  78. "const.s f5, 0\n"
  79. "maddn.s f5, f2, f2\n"
  80. "nexp01.s f3, f0\n"
  81. "const.s f4, 3\n"
  82. "addexp.s f3, f4\n"
  83. "maddn.s f4, f5, f3\n"
  84. "nexp01.s f5, f0\n"
  85. "neg.s f6, f5\n"
  86. "maddn.s f2, f4, f2\n"
  87. "const.s f1, 0\n"
  88. "const.s f4, 0\n"
  89. "const.s f7, 0\n"
  90. "maddn.s f1, f6, f2\n"
  91. "maddn.s f4, f2, f3\n"
  92. "const.s f6, 3\n"
  93. "maddn.s f7, f6, f2\n"
  94. "maddn.s f5, f1, f1\n"
  95. "maddn.s f6, f4, f2\n"
  96. "neg.s f3, f7\n"
  97. "maddn.s f1, f5, f3\n"
  98. "maddn.s f7, f6, f7\n"
  99. "mksadj.s f2, f0\n"
  100. "nexp01.s f5, f0\n"
  101. "maddn.s f5, f1, f1\n"
  102. "neg.s f3, f7\n"
  103. "addexpm.s f1, f2\n"
  104. "addexp.s f3, f2\n"
  105. "divn.s f1, f5, f3\n"
  106. "rfr %0, f1\n"
  107. :"=r"(result):"r"(a)
  108. );
  109. return result;
  110. }
  111. TEST_CASE("test FP add", "[fp]")
  112. {
  113. float a = 100.0f;
  114. float b = 0.5f;
  115. float c = addsf(a, b);
  116. float eps = c - 100.5f;
  117. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  118. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  119. }
  120. TEST_CASE("test FP mul", "[fp]")
  121. {
  122. float a = 100.0f;
  123. float b = 0.05f;
  124. float c = mulsf(a, b);
  125. float eps = c - 5.0f;
  126. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  127. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  128. }
  129. TEST_CASE("test FP div", "[fp]")
  130. {
  131. float a = 100.0f;
  132. float b = 5.0f;
  133. float c = divsf(a, b);
  134. float eps = c - 20.0f;
  135. printf("a=%g b=%g c=%g eps=%g\r\n", a, b, c, eps);
  136. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  137. }
  138. TEST_CASE("test FP sqrt", "[fp]")
  139. {
  140. float a = 100.0f;
  141. float c = sqrtsf(a);
  142. float eps = c - 10.0f;
  143. printf("a=%g c=%g eps=%g\r\n", a, c, eps);
  144. TEST_ASSERT_TRUE(fabs(eps) < 0.000001);
  145. }
  146. struct TestFPState {
  147. int fail;
  148. SemaphoreHandle_t done;
  149. };
  150. static const int testFpIter = 100000;
  151. static void tskTestFP(void *pvParameters)
  152. {
  153. struct TestFPState *state = (struct TestFPState *) pvParameters;
  154. for (int i = 0; i < testFpIter; ++i) {
  155. // calculate zero in a slightly obscure way
  156. float y = sqrtsf(addsf(1.0f, divsf(mulsf(sqrtsf(2), sqrtsf(2)), 2.0f)));
  157. y = mulsf(y, y);
  158. y = addsf(y, -2.0f);
  159. // check that result is not far from zero
  160. float eps = fabs(y);
  161. if (eps > 1e-6f) {
  162. state->fail++;
  163. printf("%s: i=%d y=%f eps=%f\r\n", __func__, i, y, eps);
  164. }
  165. }
  166. TEST_ASSERT(xSemaphoreGive(state->done));
  167. vTaskDelete(NULL);
  168. }
  169. TEST_CASE("context switch saves FP registers", "[fp]")
  170. {
  171. struct TestFPState state = {
  172. .done = xSemaphoreCreateCounting(4, 0)
  173. };
  174. TEST_ASSERT_NOT_NULL(state.done);
  175. const int prio = UNITY_FREERTOS_PRIORITY + 1;
  176. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk1", 2048, &state, prio, NULL, 0));
  177. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk2", 2048, &state, prio, NULL, 0));
  178. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk3", 2048, &state, prio, NULL, portNUM_PROCESSORS - 1));
  179. TEST_ASSERT(xTaskCreatePinnedToCore(tskTestFP, "tsk4", 2048, &state, prio, NULL, 0));
  180. for (int i = 0; i < 4; ++i) {
  181. TEST_ASSERT(xSemaphoreTake(state.done, pdMS_TO_TICKS(5000)));
  182. }
  183. vSemaphoreDelete(state.done);
  184. if (state.fail) {
  185. const int total = testFpIter * 4;
  186. printf("Failed: %d, total: %d\r\n", state.fail, total);
  187. }
  188. TEST_ASSERT(state.fail == 0);
  189. }
  190. /* Note: not static, to avoid optimisation of const result */
  191. float IRAM_ATTR test_fp_benchmark_fp_divide(int counts, unsigned *cycles)
  192. {
  193. float f = MAXFLOAT;
  194. uint32_t before, after;
  195. RSR(CCOUNT, before);
  196. for (int i = 0; i < counts; i++) {
  197. f /= 1.000432f;
  198. }
  199. RSR(CCOUNT, after);
  200. *cycles = (after - before) / counts;
  201. return f;
  202. }
  203. TEST_CASE("floating point division performance", "[fp]")
  204. {
  205. const unsigned COUNTS = 1000;
  206. unsigned cycles = 0;
  207. // initialize fpu
  208. volatile __attribute__((unused)) float dummy = sqrtf(rand());
  209. float f = test_fp_benchmark_fp_divide(COUNTS, &cycles);
  210. printf("%d divisions from %f = %f\n", COUNTS, MAXFLOAT, f);
  211. printf("Per division = %d cycles\n", cycles);
  212. TEST_PERFORMANCE_LESS_THAN(CYCLES_PER_DIV, "%d cycles", cycles);
  213. }
  214. /* Note: not static, to avoid optimisation of const result */
  215. float IRAM_ATTR test_fp_benchmark_fp_sqrt(int counts, unsigned *cycles)
  216. {
  217. float f = MAXFLOAT;
  218. uint32_t before, after;
  219. RSR(CCOUNT, before);
  220. for (int i = 0; i < counts; i++) {
  221. f = sqrtf(f);
  222. }
  223. RSR(CCOUNT, after);
  224. *cycles = (after - before) / counts;
  225. return f;
  226. }
  227. TEST_CASE("floating point square root performance", "[fp]")
  228. {
  229. const unsigned COUNTS = 200;
  230. unsigned cycles = 0;
  231. // initialize fpu
  232. volatile float __attribute__((unused)) dummy = sqrtf(rand());
  233. float f = test_fp_benchmark_fp_sqrt(COUNTS, &cycles);
  234. printf("%d square roots from %f = %f\n", COUNTS, MAXFLOAT, f);
  235. printf("Per sqrt = %d cycles\n", cycles);
  236. TEST_PERFORMANCE_LESS_THAN(CYCLES_PER_SQRT, "%d cycles", cycles);
  237. }