numeric_impl.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. // -*- C++ -*-
  2. //===-- numeric_impl.h ----------------------------------------------------===//
  3. //
  4. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  5. // See https://llvm.org/LICENSE.txt for license information.
  6. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #ifndef __PSTL_numeric_impl_H
  10. #define __PSTL_numeric_impl_H
  11. #include <iterator>
  12. #include <type_traits>
  13. #include <numeric>
  14. #include "execution_impl.h"
  15. #include "unseq_backend_simd.h"
  16. #include "algorithm_fwd.h"
  17. #if __PSTL_USE_PAR_POLICIES
  18. #include "parallel_backend.h"
  19. #endif
  20. namespace __pstl
  21. {
  22. namespace __internal
  23. {
  24. //------------------------------------------------------------------------
  25. // transform_reduce (version with two binary functions, according to draft N4659)
  26. //------------------------------------------------------------------------
  27. template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
  28. _Tp
  29. __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
  30. _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
  31. /*is_vector=*/std::false_type) noexcept
  32. {
  33. return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2);
  34. }
  35. template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
  36. _Tp
  37. __brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
  38. _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
  39. /*is_vector=*/std::true_type) noexcept
  40. {
  41. typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
  42. return __unseq_backend::__simd_transform_reduce(
  43. __last1 - __first1, __init, __binary_op1,
  44. [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); });
  45. }
  46. template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1,
  47. class _BinaryOperation2, class _IsVector>
  48. _Tp
  49. __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
  50. _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
  51. _BinaryOperation2 __binary_op2, _IsVector __is_vector,
  52. /*is_parallel=*/std::false_type) noexcept
  53. {
  54. return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector);
  55. }
  56. #if __PSTL_USE_PAR_POLICIES
  57. template <class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp,
  58. class _BinaryOperation1, class _BinaryOperation2, class _IsVector>
  59. _Tp
  60. __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
  61. _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
  62. _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/std::true_type)
  63. {
  64. return __internal::__except_handler([&]() {
  65. return __par_backend::__parallel_transform_reduce(
  66. std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
  67. [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable {
  68. return __binary_op2(*__i, *(__first2 + (__i - __first1)));
  69. },
  70. __init,
  71. __binary_op1, // Combine
  72. [__first1, __first2, __binary_op1, __binary_op2,
  73. __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp {
  74. return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1,
  75. __binary_op2, __is_vector);
  76. });
  77. });
  78. }
  79. #endif
  80. //------------------------------------------------------------------------
  81. // transform_reduce (version with unary and binary functions)
  82. //------------------------------------------------------------------------
  83. template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
  84. _Tp
  85. __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
  86. _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept
  87. {
  88. for (; __first != __last; ++__first)
  89. {
  90. __init = __binary_op(__init, __unary_op(*__first));
  91. }
  92. return __init;
  93. }
  94. template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>
  95. _Tp
  96. __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
  97. _UnaryOperation __unary_op, /*is_vector=*/std::true_type) noexcept
  98. {
  99. typedef typename std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType;
  100. return __unseq_backend::__simd_transform_reduce(
  101. __last - __first, __init, __binary_op,
  102. [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); });
  103. }
  104. template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation,
  105. class _IsVector>
  106. _Tp
  107. __pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
  108. _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector,
  109. /*is_parallel=*/std::false_type) noexcept
  110. {
  111. return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector);
  112. }
  113. #if __PSTL_USE_PAR_POLICIES
  114. template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation,
  115. class _IsVector>
  116. _Tp
  117. __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
  118. _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector,
  119. /*is_parallel=*/std::true_type)
  120. {
  121. return __internal::__except_handler([&]() {
  122. return __par_backend::__parallel_transform_reduce(
  123. std::forward<_ExecutionPolicy>(__exec), __first, __last,
  124. [__unary_op](_ForwardIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op,
  125. [__unary_op, __binary_op, __is_vector](_ForwardIterator __i, _ForwardIterator __j, _Tp __init) {
  126. return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector);
  127. });
  128. });
  129. }
  130. #endif
  131. //------------------------------------------------------------------------
  132. // transform_exclusive_scan
  133. //
  134. // walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
  135. //------------------------------------------------------------------------
  136. // Exclusive form
  137. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
  138. std::pair<_OutputIterator, _Tp>
  139. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  140. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  141. /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept
  142. {
  143. for (; __first != __last; ++__first, ++__result)
  144. {
  145. *__result = __init;
  146. __PSTL_PRAGMA_FORCEINLINE
  147. __init = __binary_op(__init, __unary_op(*__first));
  148. }
  149. return std::make_pair(__result, __init);
  150. }
  151. // Inclusive form
  152. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
  153. std::pair<_OutputIterator, _Tp>
  154. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  155. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  156. /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept
  157. {
  158. for (; __first != __last; ++__first, ++__result)
  159. {
  160. __PSTL_PRAGMA_FORCEINLINE
  161. __init = __binary_op(__init, __unary_op(*__first));
  162. *__result = __init;
  163. }
  164. return std::make_pair(__result, __init);
  165. }
  166. // type is arithmetic and binary operation is a user defined operation.
  167. template <typename _Tp, typename _BinaryOperation>
  168. using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
  169. !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
  170. // [restriction] - T shall be DefaultConstructible.
  171. // [violation] - default ctor of T shall set the identity value for binary_op.
  172. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
  173. class _Inclusive>
  174. typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
  175. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  176. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
  177. /*is_vector=*/std::true_type) noexcept
  178. {
  179. #if (__PSTL_UDS_PRESENT)
  180. return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op,
  181. _Inclusive());
  182. #else
  183. // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value
  184. return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
  185. /*is_vector=*/std::false_type());
  186. #endif
  187. }
  188. template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
  189. class _Inclusive>
  190. typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
  191. __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
  192. _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
  193. /*is_vector=*/std::true_type) noexcept
  194. {
  195. return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
  196. /*is_vector=*/std::false_type());
  197. }
  198. template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
  199. class _BinaryOperation, class _Inclusive, class _IsVector>
  200. _OutputIterator
  201. __pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
  202. _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  203. _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::false_type) noexcept
  204. {
  205. return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), __is_vector)
  206. .first;
  207. }
  208. #if __PSTL_USE_PAR_POLICIES
  209. template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
  210. class _BinaryOperation, class _Inclusive, class _IsVector>
  211. typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type
  212. __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
  213. _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  214. _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type)
  215. {
  216. typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
  217. return __internal::__except_handler([&]() {
  218. __par_backend::__parallel_transform_scan(
  219. std::forward<_ExecutionPolicy>(__exec), __last - __first,
  220. [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init,
  221. __binary_op,
  222. [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) {
  223. // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan.
  224. return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op, __unary_op,
  225. /*__is_vector*/ std::false_type());
  226. },
  227. [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j,
  228. _Tp __init) {
  229. return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, __init,
  230. __binary_op, _Inclusive(), __is_vector)
  231. .second;
  232. });
  233. return __result + (__last - __first);
  234. });
  235. }
  236. #endif
  237. #if __PSTL_USE_PAR_POLICIES
  238. template <class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp,
  239. class _BinaryOperation, class _Inclusive, class _IsVector>
  240. typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type
  241. __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last,
  242. _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
  243. _Inclusive, _IsVector __is_vector, /*is_parallel=*/std::true_type)
  244. {
  245. typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType;
  246. _DifferenceType __n = __last - __first;
  247. if (__n <= 0)
  248. {
  249. return __result;
  250. }
  251. return __internal::__except_handler([&]() {
  252. __par_backend::__parallel_strict_scan(
  253. std::forward<_ExecutionPolicy>(__exec), __n, __init,
  254. [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) {
  255. return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, __unary_op, _Tp{},
  256. __binary_op, _Inclusive(), __is_vector)
  257. .second;
  258. },
  259. __binary_op,
  260. [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial) {
  261. return *(std::transform(__result + __i, __result + __i + __len, __result + __i,
  262. [&__initial, &__binary_op](const _Tp& __x) {
  263. __PSTL_PRAGMA_FORCEINLINE
  264. return __binary_op(__initial, __x);
  265. }) -
  266. 1);
  267. },
  268. [](_Tp __res) {});
  269. return __result + (__last - __first);
  270. });
  271. }
  272. #endif
  273. //------------------------------------------------------------------------
  274. // adjacent_difference
  275. //------------------------------------------------------------------------
  276. template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
  277. _OutputIterator
  278. __brick_adjacent_difference(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first,
  279. _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept
  280. {
  281. return std::adjacent_difference(__first, __last, __d_first, __op);
  282. }
  283. template <class _ForwardIterator1, class _ForwardIterator2, class BinaryOperation>
  284. _ForwardIterator2
  285. __brick_adjacent_difference(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first,
  286. BinaryOperation __op, /*is_vector=*/std::true_type) noexcept
  287. {
  288. __PSTL_ASSERT(__first != __last);
  289. typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
  290. typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
  291. auto __n = __last - __first;
  292. *__d_first = *__first;
  293. return __unseq_backend::__simd_walk_3(
  294. __first + 1, __n - 1, __first, __d_first + 1,
  295. [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); });
  296. }
  297. template <class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation,
  298. class _IsVector>
  299. _OutputIterator
  300. __pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
  301. _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector,
  302. /*is_parallel*/ std::false_type) noexcept
  303. {
  304. return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector);
  305. }
  306. #if __PSTL_USE_PAR_POLICIES
  307. template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation,
  308. class _IsVector>
  309. _ForwardIterator2
  310. __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
  311. _ForwardIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector,
  312. /*is_parallel=*/std::true_type)
  313. {
  314. __PSTL_ASSERT(__first != __last);
  315. typedef typename std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
  316. typedef typename std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
  317. *__d_first = *__first;
  318. __par_backend::__parallel_for(
  319. std::forward<_ExecutionPolicy>(__exec), __first, __last - 1,
  320. [&__op, __is_vector, __d_first, __first](_ForwardIterator1 __b, _ForwardIterator1 __e) {
  321. _ForwardIterator2 __d_b = __d_first + (__b - __first);
  322. __internal::__brick_walk3(
  323. __b, __e, __b + 1, __d_b + 1,
  324. [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); },
  325. __is_vector);
  326. });
  327. return __d_first + (__last - __first);
  328. }
  329. #endif
  330. } // namespace __internal
  331. } // namespace __pstl
  332. #endif /* __PSTL_numeric_impl_H */