detect_platform.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. // Copyright 2018 The Gemmlowp Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // detect_platform.h: Sets up macros that control architecture-specific
  15. // features of gemmlowp's implementation.
  16. #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
  17. #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
  18. // Our inline assembly path assume GCC/Clang syntax.
  19. // Native Client doesn't seem to support inline assembly(?).
  20. #if defined(__GNUC__) && !defined(__native_client__)
  21. #define GEMMLOWP_ALLOW_INLINE_ASM
  22. #endif
  23. // Define macro statement that avoids inlining for GCC.
  24. // For non-GCC, define as empty macro.
  25. #if defined(__GNUC__)
  26. #define GEMMLOWP_NOINLINE __attribute__((noinline))
  27. #else
  28. #define GEMMLOWP_NOINLINE
  29. #endif
  30. // Detect ARM, 32-bit or 64-bit
  31. #ifdef __arm__
  32. #define GEMMLOWP_ARM_32
  33. #endif
  34. #ifdef __aarch64__
  35. #define GEMMLOWP_ARM_64
  36. #endif
  37. #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64)
  38. #define GEMMLOWP_ARM
  39. #endif
  40. // Detect MIPS, 32-bit or 64-bit
  41. #if defined(__mips) && !defined(__LP64__)
  42. #define GEMMLOWP_MIPS_32
  43. #endif
  44. #if defined(__mips) && defined(__LP64__)
  45. #define GEMMLOWP_MIPS_64
  46. #endif
  47. #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64)
  48. #define GEMMLOWP_MIPS
  49. #endif
  50. // Detect x86, 32-bit or 64-bit
  51. #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
  52. #define GEMMLOWP_X86_32
  53. #endif
  54. #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
  55. #define GEMMLOWP_X86_64
  56. #endif
  57. #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64)
  58. #define GEMMLOWP_X86
  59. #endif
  60. // Some of our optimized paths use inline assembly and for
  61. // now we don't bother enabling some other optimized paths using intrinddics
  62. // where we can't use inline assembly paths.
  63. #ifdef GEMMLOWP_ALLOW_INLINE_ASM
  64. // Detect NEON. It's important to check for both tokens.
  65. #if (defined __ARM_NEON) || (defined __ARM_NEON__)
  66. #define GEMMLOWP_NEON
  67. #endif
  68. // Convenience NEON tokens for 32-bit or 64-bit
  69. #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32)
  70. #define GEMMLOWP_NEON_32
  71. #endif
  72. #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64)
  73. #define GEMMLOWP_NEON_64
  74. #endif
  75. // Detect MIPS MSA.
  76. // Limit MSA optimizations to little-endian CPUs for now.
  77. // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
  78. #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \
  79. defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
  80. #define GEMMLOWP_MSA
  81. #endif
  82. // Convenience MIPS MSA tokens for 32-bit or 64-bit.
  83. #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32)
  84. #define GEMMLOWP_MSA_32
  85. #endif
  86. #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64)
  87. #define GEMMLOWP_MSA_64
  88. #endif
  89. // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2
  90. // Detect AVX2
  91. #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2)
  92. #define GEMMLOWP_AVX2
  93. // Detect SSE4.
  94. // MSVC does not have __SSE4_1__ macro, but will enable SSE4
  95. // when AVX is turned on.
  96. #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
  97. #define GEMMLOWP_SSE4
  98. // Detect SSE3.
  99. #elif defined(__SSE3__)
  100. #define GEMMLOWP_SSE3
  101. #endif
  102. // Convenience SSE4 tokens for 32-bit or 64-bit
  103. #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \
  104. !defined(GEMMLOWP_DISABLE_SSE4)
  105. #define GEMMLOWP_SSE4_32
  106. #endif
  107. #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32)
  108. #define GEMMLOWP_SSE3_32
  109. #endif
  110. #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \
  111. !defined(GEMMLOWP_DISABLE_SSE4)
  112. #define GEMMLOWP_SSE4_64
  113. #endif
  114. #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64)
  115. #define GEMMLOWP_SSE3_64
  116. #endif
  117. #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64)
  118. #define GEMMLOWP_AVX2_64
  119. #endif
  120. #if defined(__has_feature)
  121. #if __has_feature(memory_sanitizer)
  122. #include <sanitizer/msan_interface.h>
  123. #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison
  124. #elif __has_feature(address_sanitizer)
  125. #include <sanitizer/asan_interface.h>
  126. #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region
  127. #endif
  128. #endif
  129. #endif // GEMMLOWP_ALLOW_INLINE_ASM
  130. // Detect Android. Don't conflate with ARM - we care about tuning
  131. // for non-ARM Android devices too. This can be used in conjunction
  132. // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs.
  133. #if defined(__ANDROID__) || defined(ANDROID)
  134. #define GEMMLOWP_ANDROID
  135. #endif
  136. #endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_