cpu.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef NCNN_CPU_H
  15. #define NCNN_CPU_H
  16. #include <stddef.h>
  17. #if (defined _WIN32 && !(defined __MINGW32__))
  18. #define WIN32_LEAN_AND_MEAN
  19. #include <windows.h>
  20. #endif
  21. #if defined __ANDROID__ || defined __linux__
  22. #include <sched.h> // cpu_set_t
  23. #endif
  24. #include "platform.h"
  25. namespace ncnn {
  26. class NCNN_EXPORT CpuSet
  27. {
  28. public:
  29. CpuSet();
  30. void enable(int cpu);
  31. void disable(int cpu);
  32. void disable_all();
  33. bool is_enabled(int cpu) const;
  34. int num_enabled() const;
  35. public:
  36. #if (defined _WIN32 && !(defined __MINGW32__))
  37. ULONG_PTR mask;
  38. #endif
  39. #if defined __ANDROID__ || defined __linux__
  40. cpu_set_t cpu_set;
  41. #endif
  42. #if __APPLE__
  43. unsigned int policy;
  44. #endif
  45. };
  46. // test optional cpu features
  47. // edsp = armv7 edsp
  48. NCNN_EXPORT int cpu_support_arm_edsp();
  49. // neon = armv7 neon or aarch64 asimd
  50. NCNN_EXPORT int cpu_support_arm_neon();
  51. // vfpv4 = armv7 fp16 + fma
  52. NCNN_EXPORT int cpu_support_arm_vfpv4();
  53. // asimdhp = aarch64 asimd half precision
  54. NCNN_EXPORT int cpu_support_arm_asimdhp();
  55. // asimddp = aarch64 asimd dot product
  56. NCNN_EXPORT int cpu_support_arm_asimddp();
  57. // asimdfhm = aarch64 asimd fhm
  58. NCNN_EXPORT int cpu_support_arm_asimdfhm();
  59. // bf16 = aarch64 bf16
  60. NCNN_EXPORT int cpu_support_arm_bf16();
  61. // i8mm = aarch64 i8mm
  62. NCNN_EXPORT int cpu_support_arm_i8mm();
  63. // sve = aarch64 sve
  64. NCNN_EXPORT int cpu_support_arm_sve();
  65. // sve2 = aarch64 sve2
  66. NCNN_EXPORT int cpu_support_arm_sve2();
  67. // svebf16 = aarch64 svebf16
  68. NCNN_EXPORT int cpu_support_arm_svebf16();
  69. // svei8mm = aarch64 svei8mm
  70. NCNN_EXPORT int cpu_support_arm_svei8mm();
  71. // svef32mm = aarch64 svef32mm
  72. NCNN_EXPORT int cpu_support_arm_svef32mm();
  73. // avx = x86 avx
  74. NCNN_EXPORT int cpu_support_x86_avx();
  75. // fma = x86 fma
  76. NCNN_EXPORT int cpu_support_x86_fma();
  77. // xop = x86 xop
  78. NCNN_EXPORT int cpu_support_x86_xop();
  79. // f16c = x86 f16c
  80. NCNN_EXPORT int cpu_support_x86_f16c();
  81. // avx2 = x86 avx2 + fma + f16c
  82. NCNN_EXPORT int cpu_support_x86_avx2();
  83. // avx_vnni = x86 avx vnni
  84. NCNN_EXPORT int cpu_support_x86_avx_vnni();
  85. // avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl
  86. NCNN_EXPORT int cpu_support_x86_avx512();
  87. // avx512_vnni = x86 avx512 vnni
  88. NCNN_EXPORT int cpu_support_x86_avx512_vnni();
  89. // avx512_bf16 = x86 avx512 bf16
  90. NCNN_EXPORT int cpu_support_x86_avx512_bf16();
  91. // avx512_fp16 = x86 avx512 fp16
  92. NCNN_EXPORT int cpu_support_x86_avx512_fp16();
  93. // lsx = loongarch lsx
  94. NCNN_EXPORT int cpu_support_loongarch_lsx();
  95. // lasx = loongarch lasx
  96. NCNN_EXPORT int cpu_support_loongarch_lasx();
  97. // msa = mips mas
  98. NCNN_EXPORT int cpu_support_mips_msa();
  99. // mmi = loongson mmi
  100. NCNN_EXPORT int cpu_support_loongson_mmi();
  101. // v = riscv vector
  102. NCNN_EXPORT int cpu_support_riscv_v();
  103. // zfh = riscv half-precision float
  104. NCNN_EXPORT int cpu_support_riscv_zfh();
  105. // vlenb = riscv vector length in bytes
  106. NCNN_EXPORT int cpu_riscv_vlenb();
  107. // cpu info
  108. NCNN_EXPORT int get_cpu_count();
  109. NCNN_EXPORT int get_little_cpu_count();
  110. NCNN_EXPORT int get_big_cpu_count();
  111. NCNN_EXPORT int get_physical_cpu_count();
  112. NCNN_EXPORT int get_physical_little_cpu_count();
  113. NCNN_EXPORT int get_physical_big_cpu_count();
  114. // cpu l2 varies from 64k to 1M, but l3 can be zero
  115. NCNN_EXPORT int get_cpu_level2_cache_size();
  116. NCNN_EXPORT int get_cpu_level3_cache_size();
  117. // bind all threads on little clusters if powersave enabled
  118. // affects HMP arch cpu like ARM big.LITTLE
  119. // only implemented on android at the moment
  120. // switching powersave is expensive and not thread-safe
  121. // 0 = all cores enabled(default)
  122. // 1 = only little clusters enabled
  123. // 2 = only big clusters enabled
  124. // return 0 if success for setter function
  125. NCNN_EXPORT int get_cpu_powersave();
  126. NCNN_EXPORT int set_cpu_powersave(int powersave);
  127. // convenient wrapper
  128. NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave);
  129. // set explicit thread affinity
  130. NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask);
  131. // misc function wrapper for openmp routines
  132. NCNN_EXPORT int get_omp_num_threads();
  133. NCNN_EXPORT void set_omp_num_threads(int num_threads);
  134. NCNN_EXPORT int get_omp_dynamic();
  135. NCNN_EXPORT void set_omp_dynamic(int dynamic);
  136. NCNN_EXPORT int get_omp_thread_num();
  137. NCNN_EXPORT int get_kmp_blocktime();
  138. NCNN_EXPORT void set_kmp_blocktime(int time_ms);
  139. // need to flush denormals on Intel Chipset.
  140. // Other architectures such as ARM can be added as needed.
  141. // 0 = DAZ OFF, FTZ OFF
  142. // 1 = DAZ ON , FTZ OFF
  143. // 2 = DAZ OFF, FTZ ON
  144. // 3 = DAZ ON, FTZ ON
  145. NCNN_EXPORT int get_flush_denormals();
  146. NCNN_EXPORT int set_flush_denormals(int flush_denormals);
  147. } // namespace ncnn
  148. #endif // NCNN_CPU_H