extern "C" { extern void fusion_test(); } #include "allocator.h" #include #include #include #include #include #include #include template static void test() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; #if defined(STATIC_TEST) PVector a; PVector b; PVector c; #else PVector a(NB); PVector b(NB); PVector c(NB); #endif init_array(a,NB); init_array(b,NB); init_array(c,NB); #if defined(STATIC_TEST) PVector resa; PVector resb; #else PVector resa(NB); PVector resb(NB); #endif INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(1); results(resa,resb) = Merged{a + b,a + c}; stopSectionNB(1); STOP_CYCLE_MEASUREMENT; PVector refa; PVector refb; INIT_SYSTICK; START_CYCLE_MEASUREMENT; cmsisdsp_add(a.const_ptr(),b.const_ptr(),refa.ptr(),NB); cmsisdsp_add(a.const_ptr(),c.const_ptr(),refb.ptr(),NB); STOP_CYCLE_MEASUREMENT; if (!validate(resa.const_ptr(),refa.const_ptr(),NB)) { printf("add a failed \r\n"); } if (!validate(resb.const_ptr(),refb.const_ptr(),NB)) { printf("add b failed \r\n"); } std::cout << "=====\r\n"; } template static void test2() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; #if defined(STATIC_TEST) PVector a; PVector b; PVector c; #else PVector a(NB); PVector b(NB); PVector c(NB); #endif using Acc = typename number_traits::accumulator; init_array(a,NB); init_array(b,NB); init_array(c,NB); Acc resa,resb,refa,refb; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(2); std::tie(resa,resb) = dot(Merged{expr(a),expr(a)}, Merged{expr(b),expr(c)}); stopSectionNB(2); STOP_CYCLE_MEASUREMENT; INIT_SYSTICK; START_CYCLE_MEASUREMENT; cmsisdsp_dot(a.const_ptr(),b.const_ptr(),refa,NB); cmsisdsp_dot(a.const_ptr(),c.const_ptr(),refb,NB); STOP_CYCLE_MEASUREMENT; if (!validate(resa,refa)) { printf("dot a failed \r\n"); } if (!validate(resb,refb)) { printf("dot b failed \r\n"); } std::cout << "=====\r\n"; } template static void test3() { std::cout << "----\r\n" << "N = " << NB << "\r\n"; constexpr int U = 2; #if defined(STATIC_TEST) PVector a[U]; PVector b[U]; #else PVector a[U]={PVector(NB),PVector(NB)}; PVector b[U]={PVector(NB),PVector(NB)}; #endif using Acc = typename number_traits::accumulator; for(int i=0;i res; Acc ref[U]; INIT_SYSTICK; START_CYCLE_MEASUREMENT; startSectionNB(3); results(res) = dot(unroll( [&a](index_t k){return expr(a[k]);}), unroll( [&b](index_t k){return expr(b[k]);}) ); stopSectionNB(3); STOP_CYCLE_MEASUREMENT; INIT_SYSTICK; START_CYCLE_MEASUREMENT; for(int i=0;i void all_fusion_test() { const int nb_tails = TailForTests::tail; const int nb_loops = TailForTests::loop; title("Vector Fusion"); test(); test(); test(); test(); test(); test(); title("Dot Product Fusion"); test2(); test2(); test2(); test2(); test2(); test2(); title("Unroll Fusion"); test3(); test3(); test3(); test3(); test3(); test3(); } void fusion_test() { #if defined(FUSION_TEST) #if defined(F64_DT) all_fusion_test(); #endif #if defined(F32_DT) all_fusion_test(); #endif #if defined(F16_DT) && !defined(DISABLEFLOAT16) all_fusion_test(); #endif #if defined(Q31_DT) all_fusion_test(); #endif #if defined(Q15_DT) all_fusion_test(); #endif #if defined(Q7_DT) all_fusion_test(); #endif #endif }