libstdc++
simd_fixed_size.h
1 // Simd fixed_size ABI specific implementations -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /*
26  * The fixed_size ABI gives the following guarantees:
27  * - simd objects are passed via the stack
28  * - memory layout of `simd<_Tp, _Np>` is equivalent to `array<_Tp, _Np>`
29  * - alignment of `simd<_Tp, _Np>` is `_Np * sizeof(_Tp)` if _Np is __a
30  * power-of-2 value, otherwise `std::__bit_ceil(_Np * sizeof(_Tp))` (Note:
31  * if the alignment were to exceed the system/compiler maximum, it is bounded
32  * to that maximum)
33  * - simd_mask objects are passed like bitset<_Np>
34  * - memory layout of `simd_mask<_Tp, _Np>` is equivalent to `bitset<_Np>`
35  * - alignment of `simd_mask<_Tp, _Np>` is equal to the alignment of
36  * `bitset<_Np>`
37  */
38 
39 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_
40 #define _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_
41 
42 #if __cplusplus >= 201703L
43 
44 #include <array>
45 
46 _GLIBCXX_SIMD_BEGIN_NAMESPACE
47 
48 // __simd_tuple_element {{{
49 template <size_t _I, typename _Tp>
50  struct __simd_tuple_element;
51 
52 template <typename _Tp, typename _A0, typename... _As>
53  struct __simd_tuple_element<0, _SimdTuple<_Tp, _A0, _As...>>
54  { using type = simd<_Tp, _A0>; };
55 
56 template <size_t _I, typename _Tp, typename _A0, typename... _As>
57  struct __simd_tuple_element<_I, _SimdTuple<_Tp, _A0, _As...>>
58  { using type = typename __simd_tuple_element<_I - 1, _SimdTuple<_Tp, _As...>>::type; };
59 
60 template <size_t _I, typename _Tp>
61  using __simd_tuple_element_t = typename __simd_tuple_element<_I, _Tp>::type;
62 
63 // }}}
64 // __simd_tuple_concat {{{
65 
66 template <typename _Tp, typename... _A0s, typename... _A1s>
67  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0s..., _A1s...>
68  __simd_tuple_concat(const _SimdTuple<_Tp, _A0s...>& __left,
69  const _SimdTuple<_Tp, _A1s...>& __right)
70  {
71  if constexpr (sizeof...(_A0s) == 0)
72  return __right;
73  else if constexpr (sizeof...(_A1s) == 0)
74  return __left;
75  else
76  return {__left.first, __simd_tuple_concat(__left.second, __right)};
77  }
78 
79 template <typename _Tp, typename _A10, typename... _A1s>
80  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, simd_abi::scalar, _A10, _A1s...>
81  __simd_tuple_concat(const _Tp& __left, const _SimdTuple<_Tp, _A10, _A1s...>& __right)
82  { return {__left, __right}; }
83 
84 // }}}
85 // __simd_tuple_pop_front {{{
86 // Returns the next _SimdTuple in __x that has _Np elements less.
87 // Precondition: _Np must match the number of elements in __first (recursively)
88 template <size_t _Np, typename _Tp>
89  _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto)
90  __simd_tuple_pop_front(_Tp&& __x)
91  {
92  if constexpr (_Np == 0)
93  return static_cast<_Tp&&>(__x);
94  else
95  {
96  using _Up = __remove_cvref_t<_Tp>;
97  static_assert(_Np >= _Up::_S_first_size);
98  return __simd_tuple_pop_front<_Np - _Up::_S_first_size>(__x.second);
99  }
100  }
101 
102 // }}}
103 // __get_simd_at<_Np> {{{1
104 struct __as_simd {};
105 
106 struct __as_simd_tuple {};
107 
108 template <typename _Tp, typename _A0, typename... _Abis>
109  _GLIBCXX_SIMD_INTRINSIC constexpr simd<_Tp, _A0>
110  __simd_tuple_get_impl(__as_simd, const _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>)
111  { return {__private_init, __t.first}; }
112 
113 template <typename _Tp, typename _A0, typename... _Abis>
114  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
115  __simd_tuple_get_impl(__as_simd_tuple, const _SimdTuple<_Tp, _A0, _Abis...>& __t,
116  _SizeConstant<0>)
117  { return __t.first; }
118 
119 template <typename _Tp, typename _A0, typename... _Abis>
120  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
121  __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _A0, _Abis...>& __t, _SizeConstant<0>)
122  { return __t.first; }
123 
124 template <typename _R, size_t _Np, typename _Tp, typename... _Abis>
125  _GLIBCXX_SIMD_INTRINSIC constexpr auto
126  __simd_tuple_get_impl(_R, const _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>)
127  { return __simd_tuple_get_impl(_R(), __t.second, _SizeConstant<_Np - 1>()); }
128 
129 template <size_t _Np, typename _Tp, typename... _Abis>
130  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
131  __simd_tuple_get_impl(__as_simd_tuple, _SimdTuple<_Tp, _Abis...>& __t, _SizeConstant<_Np>)
132  { return __simd_tuple_get_impl(__as_simd_tuple(), __t.second, _SizeConstant<_Np - 1>()); }
133 
134 template <size_t _Np, typename _Tp, typename... _Abis>
135  _GLIBCXX_SIMD_INTRINSIC constexpr auto
136  __get_simd_at(const _SimdTuple<_Tp, _Abis...>& __t)
137  { return __simd_tuple_get_impl(__as_simd(), __t, _SizeConstant<_Np>()); }
138 
139 // }}}
140 // __get_tuple_at<_Np> {{{
141 template <size_t _Np, typename _Tp, typename... _Abis>
142  _GLIBCXX_SIMD_INTRINSIC constexpr auto
143  __get_tuple_at(const _SimdTuple<_Tp, _Abis...>& __t)
144  { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); }
145 
146 template <size_t _Np, typename _Tp, typename... _Abis>
147  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
148  __get_tuple_at(_SimdTuple<_Tp, _Abis...>& __t)
149  { return __simd_tuple_get_impl(__as_simd_tuple(), __t, _SizeConstant<_Np>()); }
150 
151 // __tuple_element_meta {{{1
152 template <typename _Tp, typename _Abi, size_t _Offset>
153  struct __tuple_element_meta : public _Abi::_SimdImpl
154  {
155  static_assert(is_same_v<typename _Abi::_SimdImpl::abi_type,
156  _Abi>); // this fails e.g. when _SimdImpl is an
157  // alias for _SimdImplBuiltin<_DifferentAbi>
158  using value_type = _Tp;
159  using abi_type = _Abi;
160  using _Traits = _SimdTraits<_Tp, _Abi>;
161  using _MaskImpl = typename _Abi::_MaskImpl;
162  using _MaskMember = typename _Traits::_MaskMember;
163  using simd_type = simd<_Tp, _Abi>;
164  static constexpr size_t _S_offset = _Offset;
165  static constexpr size_t _S_size() { return simd_size<_Tp, _Abi>::value; }
166  static constexpr _MaskImpl _S_mask_impl = {};
167 
168  template <size_t _Np, bool _Sanitized>
169  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
170  _S_submask(_BitMask<_Np, _Sanitized> __bits)
171  { return __bits.template _M_extract<_Offset, _S_size()>(); }
172 
173  template <size_t _Np, bool _Sanitized>
174  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
175  _S_make_mask(_BitMask<_Np, _Sanitized> __bits)
176  {
177  return _MaskImpl::template _S_convert<_Tp>(
178  __bits.template _M_extract<_Offset, _S_size()>()._M_sanitized());
179  }
180 
181  _GLIBCXX_SIMD_INTRINSIC static constexpr _ULLong
182  _S_mask_to_shifted_ullong(_MaskMember __k)
183  { return _MaskImpl::_S_to_bits(__k).to_ullong() << _Offset; }
184  };
185 
186 template <size_t _Offset, typename _Tp, typename _Abi, typename... _As>
187  constexpr
188  __tuple_element_meta<_Tp, _Abi, _Offset>
189  __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&)
190  { return {}; }
191 
192 // }}}1
193 // _WithOffset wrapper class {{{
194 template <size_t _Offset, typename _Base>
195  struct _WithOffset : public _Base
196  {
197  static inline constexpr size_t _S_offset = _Offset;
198 
199  _GLIBCXX_SIMD_INTRINSIC char*
200  _M_as_charptr()
201  { return reinterpret_cast<char*>(this) + _S_offset * sizeof(typename _Base::value_type); }
202 
203  _GLIBCXX_SIMD_INTRINSIC const char*
204  _M_as_charptr() const
205  { return reinterpret_cast<const char*>(this) + _S_offset * sizeof(typename _Base::value_type); }
206  };
207 
208 // make _WithOffset<_WithOffset> ill-formed to use:
209 template <size_t _O0, size_t _O1, typename _Base>
210  struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {};
211 
212 template <size_t _Offset, typename _Tp>
213  decltype(auto)
214  __add_offset(_Tp& __base)
215  { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); }
216 
217 template <size_t _Offset, typename _Tp>
218  decltype(auto)
219  __add_offset(const _Tp& __base)
220  { return static_cast<const _WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); }
221 
222 template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
223  decltype(auto)
224  __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base)
225  { return static_cast<_WithOffset<_Offset + _ExistingOffset, _Tp>&>(static_cast<_Tp&>(__base)); }
226 
227 template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
228  decltype(auto)
229  __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base)
230  {
231  return static_cast<const _WithOffset<_Offset + _ExistingOffset, _Tp>&>(
232  static_cast<const _Tp&>(__base));
233  }
234 
235 template <typename _Tp>
236  constexpr inline size_t __offset = 0;
237 
238 template <size_t _Offset, typename _Tp>
239  constexpr inline size_t __offset<_WithOffset<_Offset, _Tp>>
240  = _WithOffset<_Offset, _Tp>::_S_offset;
241 
242 template <typename _Tp>
243  constexpr inline size_t __offset<const _Tp> = __offset<_Tp>;
244 
245 template <typename _Tp>
246  constexpr inline size_t __offset<_Tp&> = __offset<_Tp>;
247 
248 template <typename _Tp>
249  constexpr inline size_t __offset<_Tp&&> = __offset<_Tp>;
250 
251 // }}}
252 // _SimdTuple specializations {{{1
253 // empty {{{2
254 template <typename _Tp>
255  struct _SimdTuple<_Tp>
256  {
257  using value_type = _Tp;
258  static constexpr size_t _S_tuple_size = 0;
259  static constexpr size_t _S_size() { return 0; }
260  };
261 
262 // _SimdTupleData {{{2
263 template <typename _FirstType, typename _SecondType>
264  struct _SimdTupleData
265  {
266  _FirstType first;
267  _SecondType second;
268 
269  _GLIBCXX_SIMD_INTRINSIC
270  constexpr bool
271  _M_is_constprop() const
272  {
273  if constexpr (is_class_v<_FirstType>)
274  return first._M_is_constprop() && second._M_is_constprop();
275  else
276  return __builtin_constant_p(first) && second._M_is_constprop();
277  }
278  };
279 
280 template <typename _FirstType, typename _Tp>
281  struct _SimdTupleData<_FirstType, _SimdTuple<_Tp>>
282  {
283  _FirstType first;
284  static constexpr _SimdTuple<_Tp> second = {};
285 
286  _GLIBCXX_SIMD_INTRINSIC
287  constexpr bool
288  _M_is_constprop() const
289  {
290  if constexpr (is_class_v<_FirstType>)
291  return first._M_is_constprop();
292  else
293  return __builtin_constant_p(first);
294  }
295  };
296 
297 // 1 or more {{{2
298 template <typename _Tp, typename _Abi0, typename... _Abis>
299  struct _SimdTuple<_Tp, _Abi0, _Abis...>
300  : _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember,
301  _SimdTuple<_Tp, _Abis...>>
302  {
303  static_assert(!__is_fixed_size_abi_v<_Abi0>);
304  using value_type = _Tp;
305  using _FirstType = typename _SimdTraits<_Tp, _Abi0>::_SimdMember;
306  using _FirstAbi = _Abi0;
307  using _SecondType = _SimdTuple<_Tp, _Abis...>;
308  static constexpr size_t _S_tuple_size = sizeof...(_Abis) + 1;
309 
310  static constexpr size_t _S_size()
311  { return simd_size_v<_Tp, _Abi0> + _SecondType::_S_size(); }
312 
313  static constexpr size_t _S_first_size = simd_size_v<_Tp, _Abi0>;
314  static constexpr bool _S_is_homogeneous = (is_same_v<_Abi0, _Abis> && ...);
315 
316  using _Base = _SimdTupleData<typename _SimdTraits<_Tp, _Abi0>::_SimdMember,
317  _SimdTuple<_Tp, _Abis...>>;
318  using _Base::first;
319  using _Base::second;
320 
321  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple() = default;
322  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple(const _SimdTuple&) = default;
323  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple& operator=(const _SimdTuple&)
324  = default;
325 
326  template <typename _Up>
327  _GLIBCXX_SIMD_INTRINSIC constexpr
328  _SimdTuple(_Up&& __x)
329  : _Base{static_cast<_Up&&>(__x)} {}
330 
331  template <typename _Up, typename _Up2>
332  _GLIBCXX_SIMD_INTRINSIC constexpr
333  _SimdTuple(_Up&& __x, _Up2&& __y)
334  : _Base{static_cast<_Up&&>(__x), static_cast<_Up2&&>(__y)} {}
335 
336  template <typename _Up>
337  _GLIBCXX_SIMD_INTRINSIC constexpr
338  _SimdTuple(_Up&& __x, _SimdTuple<_Tp>)
339  : _Base{static_cast<_Up&&>(__x)} {}
340 
341  _GLIBCXX_SIMD_INTRINSIC char*
342  _M_as_charptr()
343  { return reinterpret_cast<char*>(this); }
344 
345  _GLIBCXX_SIMD_INTRINSIC const char*
346  _M_as_charptr() const
347  { return reinterpret_cast<const char*>(this); }
348 
349  template <size_t _Np>
350  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
351  _M_at()
352  {
353  if constexpr (_Np == 0)
354  return first;
355  else
356  return second.template _M_at<_Np - 1>();
357  }
358 
359  template <size_t _Np>
360  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
361  _M_at() const
362  {
363  if constexpr (_Np == 0)
364  return first;
365  else
366  return second.template _M_at<_Np - 1>();
367  }
368 
369  template <size_t _Np>
370  _GLIBCXX_SIMD_INTRINSIC constexpr auto
371  _M_simd_at() const
372  {
373  if constexpr (_Np == 0)
374  return simd<_Tp, _Abi0>(__private_init, first);
375  else
376  return second.template _M_simd_at<_Np - 1>();
377  }
378 
379  template <size_t _Offset = 0, typename _Fp>
380  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple
381  _S_generate(_Fp&& __gen, _SizeConstant<_Offset> = {})
382  {
383  auto&& __first = __gen(__tuple_element_meta<_Tp, _Abi0, _Offset>());
384  if constexpr (_S_tuple_size == 1)
385  return {__first};
386  else
387  return {__first,
388  _SecondType::_S_generate(
389  static_cast<_Fp&&>(__gen),
390  _SizeConstant<_Offset + simd_size_v<_Tp, _Abi0>>())};
391  }
392 
393  template <size_t _Offset = 0, typename _Fp, typename... _More>
394  _GLIBCXX_SIMD_INTRINSIC _SimdTuple
395  _M_apply_wrapped(_Fp&& __fun, const _More&... __more) const
396  {
397  auto&& __first
398  = __fun(__make_meta<_Offset>(*this), first, __more.first...);
399  if constexpr (_S_tuple_size == 1)
400  return {__first};
401  else
402  return {
403  __first,
404  second.template _M_apply_wrapped<_Offset + simd_size_v<_Tp, _Abi0>>(
405  static_cast<_Fp&&>(__fun), __more.second...)};
406  }
407 
408  template <typename _Tup>
409  _GLIBCXX_SIMD_INTRINSIC constexpr decltype(auto)
410  _M_extract_argument(_Tup&& __tup) const
411  {
412  using _TupT = typename __remove_cvref_t<_Tup>::value_type;
413  if constexpr (is_same_v<_SimdTuple, __remove_cvref_t<_Tup>>)
414  return __tup.first;
415  else if (__builtin_is_constant_evaluated())
416  return __fixed_size_storage_t<_TupT, _S_first_size>::_S_generate(
417  [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
418  return __meta._S_generator(
419  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
420  return __tup[__i];
421  }, static_cast<_TupT*>(nullptr));
422  });
423  else
424  return [&]() { // not always_inline; allow the compiler to decide
425  __fixed_size_storage_t<_TupT, _S_first_size> __r;
426  __builtin_memcpy(__r._M_as_charptr(), __tup._M_as_charptr(),
427  sizeof(__r));
428  return __r;
429  }();
430  }
431 
432  template <typename _Tup>
433  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
434  _M_skip_argument(_Tup&& __tup) const
435  {
436  static_assert(_S_tuple_size > 1);
437  using _Up = __remove_cvref_t<_Tup>;
438  constexpr size_t __off = __offset<_Up>;
439  if constexpr (_S_first_size == _Up::_S_first_size && __off == 0)
440  return __tup.second;
441  else if constexpr (_S_first_size > _Up::_S_first_size
442  && _S_first_size % _Up::_S_first_size == 0
443  && __off == 0)
444  return __simd_tuple_pop_front<_S_first_size>(__tup);
445  else if constexpr (_S_first_size + __off < _Up::_S_first_size)
446  return __add_offset<_S_first_size>(__tup);
447  else if constexpr (_S_first_size + __off == _Up::_S_first_size)
448  return __tup.second;
449  else
450  __assert_unreachable<_Tup>();
451  }
452 
453  template <size_t _Offset, typename... _More>
454  _GLIBCXX_SIMD_INTRINSIC constexpr void
455  _M_assign_front(const _SimdTuple<_Tp, _Abi0, _More...>& __x) &
456  {
457  static_assert(_Offset == 0);
458  first = __x.first;
459  if constexpr (sizeof...(_More) > 0)
460  {
461  static_assert(sizeof...(_Abis) >= sizeof...(_More));
462  second.template _M_assign_front<0>(__x.second);
463  }
464  }
465 
466  template <size_t _Offset>
467  _GLIBCXX_SIMD_INTRINSIC constexpr void
468  _M_assign_front(const _FirstType& __x) &
469  {
470  static_assert(_Offset == 0);
471  first = __x;
472  }
473 
474  template <size_t _Offset, typename... _As>
475  _GLIBCXX_SIMD_INTRINSIC constexpr void
476  _M_assign_front(const _SimdTuple<_Tp, _As...>& __x) &
477  {
478  __builtin_memcpy(_M_as_charptr() + _Offset * sizeof(value_type),
479  __x._M_as_charptr(),
480  sizeof(_Tp) * _SimdTuple<_Tp, _As...>::_S_size());
481  }
482 
483  /*
484  * Iterate over the first objects in this _SimdTuple and call __fun for each
485  * of them. If additional arguments are passed via __more, chunk them into
486  * _SimdTuple or __vector_type_t objects of the same number of values.
487  */
488  template <typename _Fp, typename... _More>
489  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple
490  _M_apply_per_chunk(_Fp&& __fun, _More&&... __more) const
491  {
492  if constexpr ((...
493  || conjunction_v<
494  is_lvalue_reference<_More>,
495  negation<is_const<remove_reference_t<_More>>>>) )
496  {
497  // need to write back at least one of __more after calling __fun
498  auto&& __first = [&](auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
499  auto __r = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
500  __args...);
501  [[maybe_unused]] auto&& __ignore_me = {(
502  [](auto&& __dst, const auto& __src) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
503  if constexpr (is_assignable_v<decltype(__dst),
504  decltype(__dst)>)
505  {
506  __dst.template _M_assign_front<__offset<decltype(__dst)>>(
507  __src);
508  }
509  }(static_cast<_More&&>(__more), __args),
510  0)...};
511  return __r;
512  }(_M_extract_argument(__more)...);
513  if constexpr (_S_tuple_size == 1)
514  return {__first};
515  else
516  return {__first,
517  second._M_apply_per_chunk(static_cast<_Fp&&>(__fun),
518  _M_skip_argument(__more)...)};
519  }
520  else
521  {
522  auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
523  _M_extract_argument(__more)...);
524  if constexpr (_S_tuple_size == 1)
525  return {__first};
526  else
527  return {__first,
528  second._M_apply_per_chunk(static_cast<_Fp&&>(__fun),
529  _M_skip_argument(__more)...)};
530  }
531  }
532 
533  template <typename _R = _Tp, typename _Fp, typename... _More>
534  _GLIBCXX_SIMD_INTRINSIC constexpr auto
535  _M_apply_r(_Fp&& __fun, const _More&... __more) const
536  {
537  auto&& __first = __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), first,
538  __more.first...);
539  if constexpr (_S_tuple_size == 1)
540  return __first;
541  else
542  return __simd_tuple_concat<_R>(
543  __first, second.template _M_apply_r<_R>(static_cast<_Fp&&>(__fun),
544  __more.second...));
545  }
546 
547  template <typename _Fp, typename... _More>
548  _GLIBCXX_SIMD_INTRINSIC constexpr friend _SanitizedBitMask<_S_size()>
549  _M_test(const _Fp& __fun, const _SimdTuple& __x, const _More&... __more)
550  {
551  const _SanitizedBitMask<_S_first_size> __first
552  = _Abi0::_MaskImpl::_S_to_bits(
553  __fun(__tuple_element_meta<_Tp, _Abi0, 0>(), __x.first,
554  __more.first...));
555  if constexpr (_S_tuple_size == 1)
556  return __first;
557  else
558  return _M_test(__fun, __x.second, __more.second...)
559  ._M_prepend(__first);
560  }
561 
562  template <typename _Up, _Up _I>
563  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
564  operator[](integral_constant<_Up, _I>) const noexcept
565  {
566  if constexpr (_I < simd_size_v<_Tp, _Abi0>)
567  return _M_subscript_read(_I);
568  else
569  return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()];
570  }
571 
572  constexpr _Tp
573  operator[](size_t __i) const noexcept
574  {
575  if constexpr (_S_tuple_size == 1)
576  return _M_subscript_read(__i);
577 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
578  else if (not __builtin_is_constant_evaluated())
579  return reinterpret_cast<const __may_alias<_Tp>*>(this)[__i];
580 #endif
581  else if constexpr (__is_scalar_abi<_Abi0>())
582  {
583  const _Tp* ptr = &first;
584  return ptr[__i];
585  }
586  else
587  return __i < simd_size_v<_Tp, _Abi0> ? _M_subscript_read(__i)
588  : second[__i - simd_size_v<_Tp, _Abi0>];
589  }
590 
591  constexpr void
592  _M_set(size_t __i, _Tp __val) noexcept
593  {
594  if constexpr (_S_tuple_size == 1)
595  return _M_subscript_write(__i, __val);
596 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
597  else if (not __builtin_is_constant_evaluated())
598  reinterpret_cast<__may_alias<_Tp>*>(this)[__i] = __val;
599 #endif
600  else if (__i < simd_size_v<_Tp, _Abi0>)
601  _M_subscript_write(__i, __val);
602  else
603  second._M_set(__i - simd_size_v<_Tp, _Abi0>, __val);
604  }
605 
606  private:
607  // _M_subscript_read/_write {{{
608  constexpr _Tp
609  _M_subscript_read([[maybe_unused]] size_t __i) const noexcept
610  {
611  if constexpr (__is_vectorizable_v<_FirstType>)
612  return first;
613  else
614  return first[__i];
615  }
616 
617  constexpr void
618  _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept
619  {
620  if constexpr (__is_vectorizable_v<_FirstType>)
621  first = __y;
622  else
623  first._M_set(__i, __y);
624  }
625 
626  // }}}
627  };
628 
629 // __make_simd_tuple {{{1
630 template <typename _Tp, typename _A0>
631  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0>
632  __make_simd_tuple(simd<_Tp, _A0> __x0)
633  { return {__data(__x0)}; }
634 
635 template <typename _Tp, typename _A0, typename... _As>
636  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _As...>
637  __make_simd_tuple(const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
638  { return {__data(__x0), __make_simd_tuple(__xs...)}; }
639 
640 template <typename _Tp, typename _A0>
641  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0>
642  __make_simd_tuple(const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0)
643  { return {__arg0}; }
644 
645 template <typename _Tp, typename _A0, typename _A1, typename... _Abis>
646  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp, _A0, _A1, _Abis...>
647  __make_simd_tuple(
648  const typename _SimdTraits<_Tp, _A0>::_SimdMember& __arg0,
649  const typename _SimdTraits<_Tp, _A1>::_SimdMember& __arg1,
650  const typename _SimdTraits<_Tp, _Abis>::_SimdMember&... __args)
651  { return {__arg0, __make_simd_tuple<_Tp, _A1, _Abis...>(__arg1, __args...)}; }
652 
653 // __to_simd_tuple {{{1
654 template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX>
655  _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np>
656  __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX);
657 
658 template <typename _Tp, size_t _Np,
659  size_t _Offset = 0, // skip this many elements in __from0
660  typename _R = __fixed_size_storage_t<_Tp, _Np>, typename _V0,
661  typename _V0VT = _VectorTraits<_V0>, typename... _VX>
662  _GLIBCXX_SIMD_INTRINSIC _R constexpr __to_simd_tuple(const _V0 __from0, const _VX... __fromX)
663  {
664  static_assert(is_same_v<typename _V0VT::value_type, _Tp>);
665  static_assert(_Offset < _V0VT::_S_full_size);
666  using _R0 = __vector_type_t<_Tp, _R::_S_first_size>;
667  if constexpr (_R::_S_tuple_size == 1)
668  {
669  if constexpr (_Np == 1)
670  return _R{__from0[_Offset]};
671  else if constexpr (_Offset == 0 && _V0VT::_S_full_size >= _Np)
672  return _R{__intrin_bitcast<_R0>(__from0)};
673  else if constexpr (_Offset * 2 == _V0VT::_S_full_size
674  && _V0VT::_S_full_size / 2 >= _Np)
675  return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0))};
676  else if constexpr (_Offset * 4 == _V0VT::_S_full_size
677  && _V0VT::_S_full_size / 4 >= _Np)
678  return _R{__intrin_bitcast<_R0>(__extract_part<1, 4>(__from0))};
679  else
680  __assert_unreachable<_Tp>();
681  }
682  else
683  {
684  if constexpr (1 == _R::_S_first_size)
685  { // extract one scalar and recurse
686  if constexpr (_Offset + 1 < _V0VT::_S_full_size)
687  return _R{__from0[_Offset],
688  __to_simd_tuple<_Tp, _Np - 1, _Offset + 1>(__from0,
689  __fromX...)};
690  else
691  return _R{__from0[_Offset],
692  __to_simd_tuple<_Tp, _Np - 1, 0>(__fromX...)};
693  }
694 
695  // place __from0 into _R::first and recurse for __fromX -> _R::second
696  else if constexpr (_V0VT::_S_full_size == _R::_S_first_size
697  && _Offset == 0)
698  return _R{__from0,
699  __to_simd_tuple<_Tp, _Np - _R::_S_first_size>(__fromX...)};
700 
701  // place lower part of __from0 into _R::first and recurse with _Offset
702  else if constexpr (_V0VT::_S_full_size > _R::_S_first_size
703  && _Offset == 0)
704  return _R{__intrin_bitcast<_R0>(__from0),
705  __to_simd_tuple<_Tp, _Np - _R::_S_first_size,
706  _R::_S_first_size>(__from0, __fromX...)};
707 
708  // place lower part of second quarter of __from0 into _R::first and
709  // recurse with _Offset
710  else if constexpr (_Offset * 4 == _V0VT::_S_full_size
711  && _V0VT::_S_full_size >= 4 * _R::_S_first_size)
712  return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)),
713  __to_simd_tuple<_Tp, _Np - _R::_S_first_size,
714  _Offset + _R::_S_first_size>(__from0,
715  __fromX...)};
716 
717  // place lower half of high half of __from0 into _R::first and recurse
718  // with _Offset
719  else if constexpr (_Offset * 2 == _V0VT::_S_full_size
720  && _V0VT::_S_full_size >= 4 * _R::_S_first_size)
721  return _R{__intrin_bitcast<_R0>(__extract_part<2, 4>(__from0)),
722  __to_simd_tuple<_Tp, _Np - _R::_S_first_size,
723  _Offset + _R::_S_first_size>(__from0,
724  __fromX...)};
725 
726  // place high half of __from0 into _R::first and recurse with __fromX
727  else if constexpr (_Offset * 2 == _V0VT::_S_full_size
728  && _V0VT::_S_full_size / 2 >= _R::_S_first_size)
729  return _R{__intrin_bitcast<_R0>(__extract_part<1, 2>(__from0)),
730  __to_simd_tuple<_Tp, _Np - _R::_S_first_size, 0>(
731  __fromX...)};
732 
733  // ill-formed if some unforseen pattern is needed
734  else
735  __assert_unreachable<_Tp>();
736  }
737  }
738 
739 template <typename _Tp, size_t _Np, typename _V, size_t _NV, typename... _VX>
740  _GLIBCXX_SIMD_INTRINSIC constexpr __fixed_size_storage_t<_Tp, _Np>
741  __to_simd_tuple(const array<_V, _NV>& __from, const _VX... __fromX)
742  {
743  if constexpr (is_same_v<_Tp, _V>)
744  {
745  static_assert(
746  sizeof...(_VX) == 0,
747  "An array of scalars must be the last argument to __to_simd_tuple");
748  return __call_with_subscripts(
749  __from, make_index_sequence<_NV>(),
750  [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
751  return __simd_tuple_concat(
752  _SimdTuple<_Tp, simd_abi::scalar>{__args}..., _SimdTuple<_Tp>());
753  });
754  }
755  else
756  return __call_with_subscripts(
757  __from, make_index_sequence<_NV>(),
758  [&](const auto... __args) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
759  return __to_simd_tuple<_Tp, _Np>(__args..., __fromX...);
760  });
761  }
762 
763 template <size_t, typename _Tp>
764  using __to_tuple_helper = _Tp;
765 
766 template <typename _Tp, typename _A0, size_t _NOut, size_t _Np,
767  size_t... _Indexes>
768  _GLIBCXX_SIMD_INTRINSIC __fixed_size_storage_t<_Tp, _NOut>
769  __to_simd_tuple_impl(index_sequence<_Indexes...>,
770  const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args)
771  {
772  return __make_simd_tuple<_Tp, __to_tuple_helper<_Indexes, _A0>...>(
773  __args[_Indexes]...);
774  }
775 
776 template <typename _Tp, typename _A0, size_t _NOut, size_t _Np,
777  typename _R = __fixed_size_storage_t<_Tp, _NOut>>
778  _GLIBCXX_SIMD_INTRINSIC _R
779  __to_simd_tuple_sized(
780  const array<__vector_type_t<_Tp, simd_size_v<_Tp, _A0>>, _Np>& __args)
781  {
782  static_assert(_Np * simd_size_v<_Tp, _A0> >= _NOut);
783  return __to_simd_tuple_impl<_Tp, _A0, _NOut>(
784  make_index_sequence<_R::_S_tuple_size>(), __args);
785  }
786 
787 // __optimize_simd_tuple {{{1
788 template <typename _Tp>
789  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdTuple<_Tp>
790  __optimize_simd_tuple(const _SimdTuple<_Tp>)
791  { return {}; }
792 
793 template <typename _Tp, typename _Ap>
794  _GLIBCXX_SIMD_INTRINSIC constexpr const _SimdTuple<_Tp, _Ap>&
795  __optimize_simd_tuple(const _SimdTuple<_Tp, _Ap>& __x)
796  { return __x; }
797 
798 template <typename _Tp, typename _A0, typename _A1, typename... _Abis,
799  typename _R = __fixed_size_storage_t<
800  _Tp, _SimdTuple<_Tp, _A0, _A1, _Abis...>::_S_size()>>
801  _GLIBCXX_SIMD_INTRINSIC constexpr _R
802  __optimize_simd_tuple(const _SimdTuple<_Tp, _A0, _A1, _Abis...>& __x)
803  {
804  using _Tup = _SimdTuple<_Tp, _A0, _A1, _Abis...>;
805  if constexpr (is_same_v<_R, _Tup>)
806  return __x;
807  else if constexpr (is_same_v<typename _R::_FirstType,
808  typename _Tup::_FirstType>)
809  return {__x.first, __optimize_simd_tuple(__x.second)};
810  else if constexpr (__is_scalar_abi<_A0>()
811  || _A0::template _S_is_partial<_Tp>)
812  return {__generate_from_n_evaluations<_R::_S_first_size,
813  typename _R::_FirstType>(
814  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }),
815  __optimize_simd_tuple(
816  __simd_tuple_pop_front<_R::_S_first_size>(__x))};
817  else if constexpr (is_same_v<_A0, _A1>
818  && _R::_S_first_size == simd_size_v<_Tp, _A0> + simd_size_v<_Tp, _A1>)
819  return {__concat(__x.template _M_at<0>(), __x.template _M_at<1>()),
820  __optimize_simd_tuple(__x.second.second)};
821  else if constexpr (sizeof...(_Abis) >= 2
822  && _R::_S_first_size == (4 * simd_size_v<_Tp, _A0>)
823  && simd_size_v<_Tp, _A0> == __simd_tuple_element_t<
824  (sizeof...(_Abis) >= 2 ? 3 : 0), _Tup>::size())
825  return {
826  __concat(__concat(__x.template _M_at<0>(), __x.template _M_at<1>()),
827  __concat(__x.template _M_at<2>(), __x.template _M_at<3>())),
828  __optimize_simd_tuple(__x.second.second.second.second)};
829  else
830  {
831  static_assert(sizeof(_R) == sizeof(__x));
832  _R __r;
833  __builtin_memcpy(__r._M_as_charptr(), __x._M_as_charptr(),
834  sizeof(_Tp) * _R::_S_size());
835  return __r;
836  }
837  }
838 
839 // __for_each(const _SimdTuple &, Fun) {{{1
840 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
841  _GLIBCXX_SIMD_INTRINSIC constexpr void
842  __for_each(const _SimdTuple<_Tp, _A0>& __t, _Fp&& __fun)
843  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); }
844 
845 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
846  typename... _As, typename _Fp>
847  _GLIBCXX_SIMD_INTRINSIC constexpr void
848  __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun)
849  {
850  __fun(__make_meta<_Offset>(__t), __t.first);
851  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second,
852  static_cast<_Fp&&>(__fun));
853  }
854 
855 // __for_each(_SimdTuple &, Fun) {{{1
856 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
857  _GLIBCXX_SIMD_INTRINSIC constexpr void
858  __for_each(_SimdTuple<_Tp, _A0>& __t, _Fp&& __fun)
859  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__t), __t.first); }
860 
861 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
862  typename... _As, typename _Fp>
863  _GLIBCXX_SIMD_INTRINSIC constexpr void
864  __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __t, _Fp&& __fun)
865  {
866  __fun(__make_meta<_Offset>(__t), __t.first);
867  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__t.second,
868  static_cast<_Fp&&>(__fun));
869  }
870 
871 // __for_each(_SimdTuple &, const _SimdTuple &, Fun) {{{1
872 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
873  _GLIBCXX_SIMD_INTRINSIC constexpr void
874  __for_each(_SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun)
875  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); }
876 
877 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
878  typename... _As, typename _Fp>
879  _GLIBCXX_SIMD_INTRINSIC constexpr void
880  __for_each(_SimdTuple<_Tp, _A0, _A1, _As...>& __a,
881  const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun)
882  {
883  __fun(__make_meta<_Offset>(__a), __a.first, __b.first);
884  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second,
885  static_cast<_Fp&&>(__fun));
886  }
887 
888 // __for_each(const _SimdTuple &, const _SimdTuple &, Fun) {{{1
889 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _Fp>
890  _GLIBCXX_SIMD_INTRINSIC constexpr void
891  __for_each(const _SimdTuple<_Tp, _A0>& __a, const _SimdTuple<_Tp, _A0>& __b, _Fp&& __fun)
892  { static_cast<_Fp&&>(__fun)(__make_meta<_Offset>(__a), __a.first, __b.first); }
893 
894 template <size_t _Offset = 0, typename _Tp, typename _A0, typename _A1,
895  typename... _As, typename _Fp>
896  _GLIBCXX_SIMD_INTRINSIC constexpr void
897  __for_each(const _SimdTuple<_Tp, _A0, _A1, _As...>& __a,
898  const _SimdTuple<_Tp, _A0, _A1, _As...>& __b, _Fp&& __fun)
899  {
900  __fun(__make_meta<_Offset>(__a), __a.first, __b.first);
901  __for_each<_Offset + simd_size<_Tp, _A0>::value>(__a.second, __b.second,
902  static_cast<_Fp&&>(__fun));
903  }
904 
905 // }}}1
906 // __extract_part(_SimdTuple) {{{
907 template <int _Index, int _Total, int _Combine, typename _Tp, typename _A0, typename... _As>
908  _GLIBCXX_SIMD_INTRINSIC constexpr auto // __vector_type_t or _SimdTuple
909  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x)
910  {
911  // worst cases:
912  // (a) 4, 4, 4 => 3, 3, 3, 3 (_Total = 4)
913  // (b) 2, 2, 2 => 3, 3 (_Total = 2)
914  // (c) 4, 2 => 2, 2, 2 (_Total = 3)
915  using _Tuple = _SimdTuple<_Tp, _A0, _As...>;
916  static_assert(_Index + _Combine <= _Total && _Index >= 0 && _Total >= 1);
917  constexpr size_t _Np = _Tuple::_S_size();
918  static_assert(_Np >= _Total && _Np % _Total == 0);
919  constexpr size_t __values_per_part = _Np / _Total;
920  [[maybe_unused]] constexpr size_t __values_to_skip
921  = _Index * __values_per_part;
922  constexpr size_t __return_size = __values_per_part * _Combine;
923  using _RetAbi = simd_abi::deduce_t<_Tp, __return_size>;
924 
925  // handle (optimize) the simple cases
926  if constexpr (_Index == 0 && _Tuple::_S_first_size == __return_size)
927  return __x.first._M_data;
928  else if constexpr (_Index == 0 && _Total == _Combine)
929  return __x;
930  else if constexpr (_Index == 0 && _Tuple::_S_first_size >= __return_size)
931  return __intrin_bitcast<__vector_type_t<_Tp, __return_size>>(
932  __as_vector(__x.first));
933 
934  // recurse to skip unused data members at the beginning of _SimdTuple
935  else if constexpr (__values_to_skip >= _Tuple::_S_first_size)
936  { // recurse
937  if constexpr (_Tuple::_S_first_size % __values_per_part == 0)
938  {
939  constexpr int __parts_in_first
940  = _Tuple::_S_first_size / __values_per_part;
941  return __extract_part<_Index - __parts_in_first,
942  _Total - __parts_in_first, _Combine>(
943  __x.second);
944  }
945  else
946  return __extract_part<__values_to_skip - _Tuple::_S_first_size,
947  _Np - _Tuple::_S_first_size, __return_size>(
948  __x.second);
949  }
950 
951  // extract from multiple _SimdTuple data members
952  else if constexpr (__return_size > _Tuple::_S_first_size - __values_to_skip)
953  {
954 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
955  const __may_alias<_Tp>* const element_ptr
956  = reinterpret_cast<const __may_alias<_Tp>*>(&__x) + __values_to_skip;
957  return __as_vector(simd<_Tp, _RetAbi>(element_ptr, element_aligned));
958 #else
959  [[maybe_unused]] constexpr size_t __offset = __values_to_skip;
960  return __as_vector(simd<_Tp, _RetAbi>(
961  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
962  constexpr _SizeConstant<__i + __offset> __k;
963  return __x[__k];
964  }));
965 #endif
966  }
967 
968  // all of the return values are in __x.first
969  else if constexpr (_Tuple::_S_first_size % __values_per_part == 0)
970  return __extract_part<_Index, _Tuple::_S_first_size / __values_per_part,
971  _Combine>(__x.first);
972  else
973  return __extract_part<__values_to_skip, _Tuple::_S_first_size,
974  _Combine * __values_per_part>(__x.first);
975  }
976 
977 // }}}
978 // __fixed_size_storage_t<_Tp, _Np>{{{
979 template <typename _Tp, int _Np, typename _Tuple,
980  typename _Next = simd<_Tp, _AllNativeAbis::_BestAbi<_Tp, _Np>>,
981  int _Remain = _Np - int(_Next::size())>
982  struct __fixed_size_storage_builder;
983 
984 template <typename _Tp, int _Np>
985  struct __fixed_size_storage
986  : public __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp>> {};
987 
988 template <typename _Tp, int _Np, typename... _As, typename _Next>
989  struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next,
990  0>
991  { using type = _SimdTuple<_Tp, _As..., typename _Next::abi_type>; };
992 
993 template <typename _Tp, int _Np, typename... _As, typename _Next, int _Remain>
994  struct __fixed_size_storage_builder<_Tp, _Np, _SimdTuple<_Tp, _As...>, _Next,
995  _Remain>
996  {
997  using type = typename __fixed_size_storage_builder<
998  _Tp, _Remain, _SimdTuple<_Tp, _As..., typename _Next::abi_type>>::type;
999  };
1000 
1001 // }}}
1002 // _AbisInSimdTuple {{{
1003 template <typename _Tp>
1004  struct _SeqOp;
1005 
1006 template <size_t _I0, size_t... _Is>
1007  struct _SeqOp<index_sequence<_I0, _Is...>>
1008  {
1009  using _FirstPlusOne = index_sequence<_I0 + 1, _Is...>;
1010  using _NotFirstPlusOne = index_sequence<_I0, (_Is + 1)...>;
1011  template <size_t _First, size_t _Add>
1012  using _Prepend = index_sequence<_First, _I0 + _Add, (_Is + _Add)...>;
1013  };
1014 
1015 template <typename _Tp>
1016  struct _AbisInSimdTuple;
1017 
1018 template <typename _Tp>
1019  struct _AbisInSimdTuple<_SimdTuple<_Tp>>
1020  {
1021  using _Counts = index_sequence<0>;
1022  using _Begins = index_sequence<0>;
1023  };
1024 
1025 template <typename _Tp, typename _Ap>
1026  struct _AbisInSimdTuple<_SimdTuple<_Tp, _Ap>>
1027  {
1028  using _Counts = index_sequence<1>;
1029  using _Begins = index_sequence<0>;
1030  };
1031 
1032 template <typename _Tp, typename _A0, typename... _As>
1033  struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A0, _As...>>
1034  {
1035  using _Counts = typename _SeqOp<typename _AbisInSimdTuple<
1036  _SimdTuple<_Tp, _A0, _As...>>::_Counts>::_FirstPlusOne;
1037  using _Begins = typename _SeqOp<typename _AbisInSimdTuple<
1038  _SimdTuple<_Tp, _A0, _As...>>::_Begins>::_NotFirstPlusOne;
1039  };
1040 
1041 template <typename _Tp, typename _A0, typename _A1, typename... _As>
1042  struct _AbisInSimdTuple<_SimdTuple<_Tp, _A0, _A1, _As...>>
1043  {
1044  using _Counts = typename _SeqOp<typename _AbisInSimdTuple<
1045  _SimdTuple<_Tp, _A1, _As...>>::_Counts>::template _Prepend<1, 0>;
1046  using _Begins = typename _SeqOp<typename _AbisInSimdTuple<
1047  _SimdTuple<_Tp, _A1, _As...>>::_Begins>::template _Prepend<0, 1>;
1048  };
1049 
1050 // }}}
1051 // __autocvt_to_simd {{{
1052 template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
1053  struct __autocvt_to_simd
1054  {
1055  _Tp _M_data;
1056  using _TT = __remove_cvref_t<_Tp>;
1057 
1058  constexpr
1059  operator _TT()
1060  { return _M_data; }
1061 
1062  constexpr
1063  operator _TT&()
1064  {
1065  static_assert(is_lvalue_reference<_Tp>::value, "");
1066  static_assert(!is_const<_Tp>::value, "");
1067  return _M_data;
1068  }
1069 
1070  constexpr
1071  operator _TT*()
1072  {
1073  static_assert(is_lvalue_reference<_Tp>::value, "");
1074  static_assert(!is_const<_Tp>::value, "");
1075  return &_M_data;
1076  }
1077 
1078  constexpr inline
1079  __autocvt_to_simd(_Tp dd) : _M_data(dd) {}
1080 
1081  template <typename _Abi>
1082  constexpr
1083  operator simd<typename _TT::value_type, _Abi>()
1084  { return {__private_init, _M_data}; }
1085 
1086  template <typename _Abi>
1087  constexpr
1088  operator simd<typename _TT::value_type, _Abi>&()
1089  { return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); }
1090 
1091  template <typename _Abi>
1092  constexpr
1093  operator simd<typename _TT::value_type, _Abi>*()
1094  { return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(&_M_data); }
1095  };
1096 
1097 template <typename _Tp>
1098  __autocvt_to_simd(_Tp &&) -> __autocvt_to_simd<_Tp>;
1099 
1100 template <typename _Tp>
1101  struct __autocvt_to_simd<_Tp, true>
1102  {
1103  using _TT = __remove_cvref_t<_Tp>;
1104  _Tp _M_data;
1105  fixed_size_simd<_TT, 1> _M_fd;
1106 
1107  constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {}
1108 
1109  ~__autocvt_to_simd()
1110  { _M_data = __data(_M_fd).first; }
1111 
1112  constexpr
1113  operator fixed_size_simd<_TT, 1>()
1114  { return _M_fd; }
1115 
1116  constexpr
1117  operator fixed_size_simd<_TT, 1> &()
1118  {
1119  static_assert(is_lvalue_reference<_Tp>::value, "");
1120  static_assert(!is_const<_Tp>::value, "");
1121  return _M_fd;
1122  }
1123 
1124  constexpr
1125  operator fixed_size_simd<_TT, 1> *()
1126  {
1127  static_assert(is_lvalue_reference<_Tp>::value, "");
1128  static_assert(!is_const<_Tp>::value, "");
1129  return &_M_fd;
1130  }
1131  };
1132 
1133 // }}}
1134 
1135 struct _CommonImplFixedSize;
1136 template <int _Np> struct _SimdImplFixedSize;
1137 template <int _Np> struct _MaskImplFixedSize;
1138 // simd_abi::_Fixed {{{
1139 template <int _Np>
1140  struct simd_abi::_Fixed
1141  {
1142  template <typename _Tp> static constexpr size_t _S_size = _Np;
1143  template <typename _Tp> static constexpr size_t _S_full_size = _Np;
1144  // validity traits {{{
1145  struct _IsValidAbiTag : public __bool_constant<(_Np > 0)> {};
1146 
1147  template <typename _Tp>
1148  struct _IsValidSizeFor
1149  : __bool_constant<(_Np <= simd_abi::max_fixed_size<_Tp>)> {};
1150 
1151  template <typename _Tp>
1152  struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>,
1153  _IsValidSizeFor<_Tp>> {};
1154 
1155  template <typename _Tp>
1156  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
1157 
1158  // }}}
1159  // _S_masked {{{
1160  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1161  _S_masked(_BitMask<_Np> __x)
1162  { return __x._M_sanitized(); }
1163 
1164  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1165  _S_masked(_SanitizedBitMask<_Np> __x)
1166  { return __x; }
1167 
1168  // }}}
1169  // _*Impl {{{
1170  using _CommonImpl = _CommonImplFixedSize;
1171  using _SimdImpl = _SimdImplFixedSize<_Np>;
1172  using _MaskImpl = _MaskImplFixedSize<_Np>;
1173 
1174  // }}}
1175  // __traits {{{
1176  template <typename _Tp, bool = _S_is_valid_v<_Tp>>
1177  struct __traits : _InvalidTraits {};
1178 
1179  template <typename _Tp>
1180  struct __traits<_Tp, true>
1181  {
1182  using _IsValid = true_type;
1183  using _SimdImpl = _SimdImplFixedSize<_Np>;
1184  using _MaskImpl = _MaskImplFixedSize<_Np>;
1185 
1186  // simd and simd_mask member types {{{
1187  using _SimdMember = __fixed_size_storage_t<_Tp, _Np>;
1188  using _MaskMember = _SanitizedBitMask<_Np>;
1189 
1190  static constexpr size_t _S_simd_align
1191  = std::__bit_ceil(_Np * sizeof(_Tp));
1192 
1193  static constexpr size_t _S_mask_align = alignof(_MaskMember);
1194 
1195  // }}}
1196  // _SimdBase / base class for simd, providing extra conversions {{{
1197  struct _SimdBase
1198  {
1199  // The following ensures, function arguments are passed via the stack.
1200  // This is important for ABI compatibility across TU boundaries
1201  constexpr
1202  _SimdBase(const _SimdBase&) {}
1203 
1204  _SimdBase() = default;
1205 
1206  constexpr explicit
1207  operator const _SimdMember &() const
1208  { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; }
1209 
1210  constexpr explicit
1211  operator array<_Tp, _Np>() const
1212  {
1213  array<_Tp, _Np> __r;
1214  // _SimdMember can be larger because of higher alignment
1215  static_assert(sizeof(__r) <= sizeof(_SimdMember), "");
1216  __builtin_memcpy(__r.data(), &static_cast<const _SimdMember&>(*this),
1217  sizeof(__r));
1218  return __r;
1219  }
1220  };
1221 
1222  // }}}
1223  // _MaskBase {{{
1224  // empty. The bitset interface suffices
1225  struct _MaskBase {};
1226 
1227  // }}}
1228  // _SimdCastType {{{
1229  struct _SimdCastType
1230  {
1231  constexpr
1232  _SimdCastType(const array<_Tp, _Np>&);
1233 
1234  constexpr
1235  _SimdCastType(const _SimdMember& dd) : _M_data(dd) {}
1236 
1237  constexpr explicit
1238  operator const _SimdMember &() const { return _M_data; }
1239 
1240  private:
1241  const _SimdMember& _M_data;
1242  };
1243 
1244  // }}}
1245  // _MaskCastType {{{
1246  class _MaskCastType
1247  {
1248  _MaskCastType() = delete;
1249  };
1250  // }}}
1251  };
1252  // }}}
1253  };
1254 
1255 // }}}
1256 // _CommonImplFixedSize {{{
1257 struct _CommonImplFixedSize
1258 {
1259  // _S_store {{{
1260  template <typename _Tp, typename... _As>
1261  _GLIBCXX_SIMD_INTRINSIC static void
1262  _S_store(const _SimdTuple<_Tp, _As...>& __x, void* __addr)
1263  {
1264  constexpr size_t _Np = _SimdTuple<_Tp, _As...>::_S_size();
1265  __builtin_memcpy(__addr, &__x, _Np * sizeof(_Tp));
1266  }
1267 
1268  // }}}
1269 };
1270 
1271 // }}}
1272 // _SimdImplFixedSize {{{1
1273 // fixed_size should not inherit from _SimdMathFallback in order for
1274 // specializations in the used _SimdTuple Abis to get used
1275 template <int _Np>
1276  struct _SimdImplFixedSize
1277  {
1278  // member types {{{2
1279  using _MaskMember = _SanitizedBitMask<_Np>;
1280 
1281  template <typename _Tp>
1282  using _SimdMember = __fixed_size_storage_t<_Tp, _Np>;
1283 
1284  template <typename _Tp>
1285  static constexpr size_t _S_tuple_size = _SimdMember<_Tp>::_S_tuple_size;
1286 
1287  template <typename _Tp>
1288  using _Simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
1289 
1290  template <typename _Tp>
1291  using _TypeTag = _Tp*;
1292 
1293  // broadcast {{{2
1294  template <typename _Tp>
1295  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1296  _S_broadcast(_Tp __x) noexcept
1297  {
1298  return _SimdMember<_Tp>::_S_generate(
1299  [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1300  return __meta._S_broadcast(__x);
1301  });
1302  }
1303 
1304  // _S_generator {{{2
1305  template <typename _Fp, typename _Tp>
1306  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1307  _S_generator(_Fp&& __gen, _TypeTag<_Tp>)
1308  {
1309  return _SimdMember<_Tp>::_S_generate(
1310  [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1311  return __meta._S_generator(
1312  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1313  return __i < _Np ? __gen(_SizeConstant<__meta._S_offset + __i>())
1314  : 0;
1315  },
1316  _TypeTag<_Tp>());
1317  });
1318  }
1319 
1320  // _S_load {{{2
1321  template <typename _Tp, typename _Up>
1322  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1323  _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
1324  {
1325  return _SimdMember<_Tp>::_S_generate(
1326  [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1327  return __meta._S_load(&__mem[__meta._S_offset], _TypeTag<_Tp>());
1328  });
1329  }
1330 
1331  // _S_masked_load {{{2
1332  template <typename _Tp, typename... _As, typename _Up>
1333  _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...>
1334  _S_masked_load(const _SimdTuple<_Tp, _As...>& __old,
1335  const _MaskMember __bits, const _Up* __mem) noexcept
1336  {
1337  auto __merge = __old;
1338  __for_each(__merge, [&](auto __meta, auto& __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1339  if (__meta._S_submask(__bits).any())
1340 #pragma GCC diagnostic push
1341  // Dereferencing __mem + __meta._S_offset could be UB ([expr.add]/4.3).
1342  // It is the responsibility of the caller of the masked load (via the mask's value) to
1343  // avoid UB. Consequently, the compiler may assume this branch is unreachable, if the
1344  // pointer arithmetic is UB.
1345 #pragma GCC diagnostic ignored "-Warray-bounds"
1346  __native
1347  = __meta._S_masked_load(__native, __meta._S_make_mask(__bits),
1348  __mem + __meta._S_offset);
1349 #pragma GCC diagnostic pop
1350  });
1351  return __merge;
1352  }
1353 
1354  // _S_store {{{2
1355  template <typename _Tp, typename _Up>
1356  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1357  _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept
1358  {
1359  __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1360  __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>());
1361  });
1362  }
1363 
1364  // _S_masked_store {{{2
1365  template <typename _Tp, typename... _As, typename _Up>
1366  _GLIBCXX_SIMD_INTRINSIC static void
1367  _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem,
1368  const _MaskMember __bits) noexcept
1369  {
1370  __for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1371  if (__meta._S_submask(__bits).any())
1372 #pragma GCC diagnostic push
1373  // __mem + __mem._S_offset could be UB ([expr.add]/4.3, but it punts
1374  // the responsibility for avoiding UB to the caller of the masked
1375  // store via the mask. Consequently, the compiler may assume this
1376  // branch is unreachable, if the pointer arithmetic is UB.
1377 #pragma GCC diagnostic ignored "-Warray-bounds"
1378  __meta._S_masked_store(__native, __mem + __meta._S_offset,
1379  __meta._S_make_mask(__bits));
1380 #pragma GCC diagnostic pop
1381  });
1382  }
1383 
1384  // negation {{{2
1385  template <typename _Tp, typename... _As>
1386  static constexpr inline _MaskMember
1387  _S_negate(const _SimdTuple<_Tp, _As...>& __x) noexcept
1388  {
1389  _MaskMember __bits = 0;
1390  __for_each(
1391  __x, [&__bits](auto __meta, auto __native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1392  __bits
1393  |= __meta._S_mask_to_shifted_ullong(__meta._S_negate(__native));
1394  });
1395  return __bits;
1396  }
1397 
1398  // reductions {{{2
1399  template <typename _Tp, typename _BinaryOperation>
1400  static constexpr inline _Tp _S_reduce(const _Simd<_Tp>& __x,
1401  const _BinaryOperation& __binary_op)
1402  {
1403  using _Tup = _SimdMember<_Tp>;
1404  const _Tup& __tup = __data(__x);
1405  if constexpr (_Tup::_S_tuple_size == 1)
1406  return _Tup::_FirstAbi::_SimdImpl::_S_reduce(
1407  __tup.template _M_simd_at<0>(), __binary_op);
1408  else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 2
1409  && _Tup::_SecondType::_S_size() == 1)
1410  {
1411  return __binary_op(simd<_Tp, simd_abi::scalar>(
1412  reduce(__tup.template _M_simd_at<0>(),
1413  __binary_op)),
1414  __tup.template _M_simd_at<1>())[0];
1415  }
1416  else if constexpr (_Tup::_S_tuple_size == 2 && _Tup::_S_size() > 4
1417  && _Tup::_SecondType::_S_size() == 2)
1418  {
1419  return __binary_op(
1420  simd<_Tp, simd_abi::scalar>(
1421  reduce(__tup.template _M_simd_at<0>(), __binary_op)),
1422  simd<_Tp, simd_abi::scalar>(
1423  reduce(__tup.template _M_simd_at<1>(), __binary_op)))[0];
1424  }
1425  else
1426  {
1427  const auto& __x2 = __call_with_n_evaluations<
1428  __div_roundup(_Tup::_S_tuple_size, 2)>(
1429  [](auto __first_simd, auto... __remaining) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1430  if constexpr (sizeof...(__remaining) == 0)
1431  return __first_simd;
1432  else
1433  {
1434  using _Tup2
1435  = _SimdTuple<_Tp,
1436  typename decltype(__first_simd)::abi_type,
1437  typename decltype(__remaining)::abi_type...>;
1438  return fixed_size_simd<_Tp, _Tup2::_S_size()>(
1439  __private_init,
1440  __make_simd_tuple(__first_simd, __remaining...));
1441  }
1442  },
1443  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1444  auto __left = __tup.template _M_simd_at<2 * __i>();
1445  if constexpr (2 * __i + 1 == _Tup::_S_tuple_size)
1446  return __left;
1447  else
1448  {
1449  auto __right = __tup.template _M_simd_at<2 * __i + 1>();
1450  using _LT = decltype(__left);
1451  using _RT = decltype(__right);
1452  if constexpr (_LT::size() == _RT::size())
1453  return __binary_op(__left, __right);
1454  else
1455  {
1456  _GLIBCXX_SIMD_USE_CONSTEXPR_API
1457  typename _LT::mask_type __k(
1458  __private_init,
1459  [](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1460  return __j < _RT::size();
1461  });
1462  _LT __ext_right = __left;
1463  where(__k, __ext_right)
1464  = __proposed::resizing_simd_cast<_LT>(__right);
1465  where(__k, __left) = __binary_op(__left, __ext_right);
1466  return __left;
1467  }
1468  }
1469  });
1470  return reduce(__x2, __binary_op);
1471  }
1472  }
1473 
1474  // _S_min, _S_max {{{2
1475  template <typename _Tp, typename... _As>
1476  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1477  _S_min(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b)
1478  {
1479  return __a._M_apply_per_chunk(
1480  [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1481  return __impl._S_min(__aa, __bb);
1482  },
1483  __b);
1484  }
1485 
1486  template <typename _Tp, typename... _As>
1487  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1488  _S_max(const _SimdTuple<_Tp, _As...>& __a, const _SimdTuple<_Tp, _As...>& __b)
1489  {
1490  return __a._M_apply_per_chunk(
1491  [](auto __impl, auto __aa, auto __bb) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1492  return __impl._S_max(__aa, __bb);
1493  },
1494  __b);
1495  }
1496 
1497  // _S_complement {{{2
1498  template <typename _Tp, typename... _As>
1499  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1500  _S_complement(const _SimdTuple<_Tp, _As...>& __x) noexcept
1501  {
1502  return __x._M_apply_per_chunk(
1503  [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1504  return __impl._S_complement(__xx);
1505  });
1506  }
1507 
1508  // _S_unary_minus {{{2
1509  template <typename _Tp, typename... _As>
1510  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1511  _S_unary_minus(const _SimdTuple<_Tp, _As...>& __x) noexcept
1512  {
1513  return __x._M_apply_per_chunk(
1514  [](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1515  return __impl._S_unary_minus(__xx);
1516  });
1517  }
1518 
1519  // arithmetic operators {{{2
1520 
1521 #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \
1522  template <typename _Tp, typename... _As> \
1523  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...> name_( \
1524  const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y) \
1525  { \
1526  return __x._M_apply_per_chunk( \
1527  [](auto __impl, auto __xx, auto __yy) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1528  return __impl.name_(__xx, __yy); \
1529  }, \
1530  __y); \
1531  }
1532 
1533  _GLIBCXX_SIMD_FIXED_OP(_S_plus, +)
1534  _GLIBCXX_SIMD_FIXED_OP(_S_minus, -)
1535  _GLIBCXX_SIMD_FIXED_OP(_S_multiplies, *)
1536  _GLIBCXX_SIMD_FIXED_OP(_S_divides, /)
1537  _GLIBCXX_SIMD_FIXED_OP(_S_modulus, %)
1538  _GLIBCXX_SIMD_FIXED_OP(_S_bit_and, &)
1539  _GLIBCXX_SIMD_FIXED_OP(_S_bit_or, |)
1540  _GLIBCXX_SIMD_FIXED_OP(_S_bit_xor, ^)
1541  _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_left, <<)
1542  _GLIBCXX_SIMD_FIXED_OP(_S_bit_shift_right, >>)
1543 #undef _GLIBCXX_SIMD_FIXED_OP
1544 
1545  template <typename _Tp, typename... _As>
1546  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1547  _S_bit_shift_left(const _SimdTuple<_Tp, _As...>& __x, int __y)
1548  {
1549  return __x._M_apply_per_chunk(
1550  [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1551  return __impl._S_bit_shift_left(__xx, __y);
1552  });
1553  }
1554 
1555  template <typename _Tp, typename... _As>
1556  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdTuple<_Tp, _As...>
1557  _S_bit_shift_right(const _SimdTuple<_Tp, _As...>& __x, int __y)
1558  {
1559  return __x._M_apply_per_chunk(
1560  [__y](auto __impl, auto __xx) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1561  return __impl._S_bit_shift_right(__xx, __y);
1562  });
1563  }
1564 
1565  // math {{{2
1566 #define _GLIBCXX_SIMD_APPLY_ON_TUPLE(_RetTp, __name) \
1567  template <typename _Tp, typename... _As, typename... _More> \
1568  static inline __fixed_size_storage_t<_RetTp, _Np> \
1569  _S_##__name(const _SimdTuple<_Tp, _As...>& __x, \
1570  const _More&... __more) \
1571  { \
1572  if constexpr (sizeof...(_More) == 0) \
1573  { \
1574  if constexpr (is_same_v<_Tp, _RetTp>) \
1575  return __x._M_apply_per_chunk( \
1576  [](auto __impl, auto __xx) \
1577  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1578  { \
1579  using _V = typename decltype(__impl)::simd_type; \
1580  return __data(__name(_V(__private_init, __xx))); \
1581  }); \
1582  else \
1583  return __optimize_simd_tuple( \
1584  __x.template _M_apply_r<_RetTp>( \
1585  [](auto __impl, auto __xx) \
1586  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1587  { return __impl._S_##__name(__xx); })); \
1588  } \
1589  else if constexpr ( \
1590  is_same_v< \
1591  _Tp, \
1592  _RetTp> && (... && is_same_v<_SimdTuple<_Tp, _As...>, _More>) ) \
1593  return __x._M_apply_per_chunk( \
1594  [](auto __impl, auto __xx, auto... __pack) \
1595  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1596  { \
1597  using _V = typename decltype(__impl)::simd_type; \
1598  return __data(__name(_V(__private_init, __xx), \
1599  _V(__private_init, __pack)...)); \
1600  }, __more...); \
1601  else if constexpr (is_same_v<_Tp, _RetTp>) \
1602  return __x._M_apply_per_chunk( \
1603  [](auto __impl, auto __xx, auto... __pack) \
1604  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1605  { \
1606  using _V = typename decltype(__impl)::simd_type; \
1607  return __data(__name(_V(__private_init, __xx), \
1608  __autocvt_to_simd(__pack)...)); \
1609  }, __more...); \
1610  else \
1611  __assert_unreachable<_Tp>(); \
1612  }
1613 
1614  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acos)
1615  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asin)
1616  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan)
1617  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atan2)
1618  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cos)
1619  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sin)
1620  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tan)
1621  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, acosh)
1622  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, asinh)
1623  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, atanh)
1624  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cosh)
1625  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sinh)
1626  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tanh)
1627  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp)
1628  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, exp2)
1629  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, expm1)
1630  _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, ilogb)
1631  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log)
1632  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log10)
1633  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log1p)
1634  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, log2)
1635  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, logb)
1636  // modf implemented in simd_math.h
1637  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp,
1638  scalbn) // double scalbn(double x, int exp);
1639  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, scalbln)
1640  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, cbrt)
1641  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, abs)
1642  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fabs)
1643  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, pow)
1644  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, sqrt)
1645  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erf)
1646  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, erfc)
1647  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, lgamma)
1648  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, tgamma)
1649  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, trunc)
1650  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ceil)
1651  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, floor)
1652  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nearbyint)
1653 
1654  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, rint)
1655  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lrint)
1656  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llrint)
1657 
1658  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, round)
1659  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long, lround)
1660  _GLIBCXX_SIMD_APPLY_ON_TUPLE(long long, llround)
1661 
1662  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, ldexp)
1663  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmod)
1664  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, remainder)
1665  // copysign in simd_math.h
1666  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, nextafter)
1667  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fdim)
1668  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmax)
1669  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fmin)
1670  _GLIBCXX_SIMD_APPLY_ON_TUPLE(_Tp, fma)
1671  _GLIBCXX_SIMD_APPLY_ON_TUPLE(int, fpclassify)
1672 #undef _GLIBCXX_SIMD_APPLY_ON_TUPLE
1673 
1674  template <typename _Tp, typename... _Abis>
1675  static inline _SimdTuple<_Tp, _Abis...>
1676  _S_remquo(const _SimdTuple<_Tp, _Abis...>& __x, const _SimdTuple<_Tp, _Abis...>& __y,
1677  __fixed_size_storage_t<int, _SimdTuple<_Tp, _Abis...>::_S_size()>* __z)
1678  {
1679  return __x._M_apply_per_chunk(
1680  [](auto __impl, const auto __xx, const auto __yy, auto& __zz)
1681  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
1682  { return __impl._S_remquo(__xx, __yy, &__zz); },
1683  __y, *__z);
1684  }
1685 
1686  template <typename _Tp, typename... _As>
1687  static inline _SimdTuple<_Tp, _As...>
1688  _S_frexp(const _SimdTuple<_Tp, _As...>& __x,
1689  __fixed_size_storage_t<int, _Np>& __exp) noexcept
1690  {
1691  return __x._M_apply_per_chunk(
1692  [](auto __impl, const auto& __a, auto& __b) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1693  return __data(frexp(typename decltype(__impl)::simd_type(__private_init, __a),
1694  __autocvt_to_simd(__b)));
1695  }, __exp);
1696  }
1697 
1698 #define _GLIBCXX_SIMD_TEST_ON_TUPLE_(name_) \
1699  template <typename _Tp, typename... _As> \
1700  static inline _MaskMember \
1701  _S_##name_(const _SimdTuple<_Tp, _As...>& __x) noexcept \
1702  { \
1703  return _M_test([] (auto __impl, auto __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { \
1704  return __impl._S_##name_(__xx); \
1705  }, __x); \
1706  }
1707 
1708  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isinf)
1709  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isfinite)
1710  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnan)
1711  _GLIBCXX_SIMD_TEST_ON_TUPLE_(isnormal)
1712  _GLIBCXX_SIMD_TEST_ON_TUPLE_(signbit)
1713 #undef _GLIBCXX_SIMD_TEST_ON_TUPLE_
1714 
1715  // _S_increment & _S_decrement{{{2
1716  template <typename... _Ts>
1717  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1718  _S_increment(_SimdTuple<_Ts...>& __x)
1719  {
1720  __for_each(
1721  __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1722  __meta._S_increment(native);
1723  });
1724  }
1725 
1726  template <typename... _Ts>
1727  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1728  _S_decrement(_SimdTuple<_Ts...>& __x)
1729  {
1730  __for_each(
1731  __x, [](auto __meta, auto& native) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1732  __meta._S_decrement(native);
1733  });
1734  }
1735 
1736  // compares {{{2
1737 #define _GLIBCXX_SIMD_CMP_OPERATIONS(__cmp) \
1738  template <typename _Tp, typename... _As> \
1739  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember \
1740  __cmp(const _SimdTuple<_Tp, _As...>& __x, \
1741  const _SimdTuple<_Tp, _As...>& __y) \
1742  { \
1743  return _M_test([](auto __impl, auto __xx, auto __yy) \
1744  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
1745  { return __impl.__cmp(__xx, __yy); }, \
1746  __x, __y); \
1747  }
1748 
1749  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_equal_to)
1750  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_not_equal_to)
1751  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less)
1752  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_less_equal)
1753  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isless)
1754  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessequal)
1755  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreater)
1756  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isgreaterequal)
1757  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_islessgreater)
1758  _GLIBCXX_SIMD_CMP_OPERATIONS(_S_isunordered)
1759 #undef _GLIBCXX_SIMD_CMP_OPERATIONS
1760 
1761  // smart_reference access {{{2
1762  template <typename _Tp, typename... _As, typename _Up>
1763  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1764  _S_set(_SimdTuple<_Tp, _As...>& __v, int __i, _Up&& __x) noexcept
1765  { __v._M_set(__i, static_cast<_Up&&>(__x)); }
1766 
1767  // _S_masked_assign {{{2
1768  template <typename _Tp, typename... _As>
1769  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1770  _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1771  const __type_identity_t<_SimdTuple<_Tp, _As...>>& __rhs)
1772  {
1773  __for_each(__lhs, __rhs,
1774  [&](auto __meta, auto& __native_lhs, auto __native_rhs)
1775  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
1776  {
1777  __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
1778  __native_rhs);
1779  });
1780  }
1781 
1782  // Optimization for the case where the RHS is a scalar. No need to broadcast
1783  // the scalar to a simd first.
1784  template <typename _Tp, typename... _As>
1785  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1786  _S_masked_assign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1787  const __type_identity_t<_Tp> __rhs)
1788  {
1789  __for_each(
1790  __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1791  __meta._S_masked_assign(__meta._S_make_mask(__bits), __native_lhs,
1792  __rhs);
1793  });
1794  }
1795 
1796  // _S_masked_cassign {{{2
1797  template <typename _Op, typename _Tp, typename... _As>
1798  static constexpr inline void
1799  _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1800  const _SimdTuple<_Tp, _As...>& __rhs, _Op __op)
1801  {
1802  __for_each(__lhs, __rhs,
1803  [&](auto __meta, auto& __native_lhs, auto __native_rhs)
1804  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
1805  {
1806  __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
1807  __native_lhs, __native_rhs, __op);
1808  });
1809  }
1810 
1811  // Optimization for the case where the RHS is a scalar. No need to broadcast
1812  // the scalar to a simd first.
1813  template <typename _Op, typename _Tp, typename... _As>
1814  static constexpr inline void
1815  _S_masked_cassign(const _MaskMember __bits, _SimdTuple<_Tp, _As...>& __lhs,
1816  const _Tp& __rhs, _Op __op)
1817  {
1818  __for_each(
1819  __lhs, [&](auto __meta, auto& __native_lhs) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1820  __meta.template _S_masked_cassign(__meta._S_make_mask(__bits),
1821  __native_lhs, __rhs, __op);
1822  });
1823  }
1824 
1825  // _S_masked_unary {{{2
1826  template <template <typename> class _Op, typename _Tp, typename... _As>
1827  static constexpr inline _SimdTuple<_Tp, _As...>
1828  _S_masked_unary(const _MaskMember __bits,
1829  const _SimdTuple<_Tp, _As...> __v) // TODO: const-ref __v?
1830  {
1831  return __v._M_apply_wrapped([&__bits](auto __meta,
1832  auto __native) constexpr {
1833  return __meta.template _S_masked_unary<_Op>(__meta._S_make_mask(
1834  __bits),
1835  __native);
1836  });
1837  }
1838 
1839  // }}}2
1840  };
1841 
1842 // _MaskImplFixedSize {{{1
1843 template <int _Np>
1844  struct _MaskImplFixedSize
1845  {
1846  static_assert(
1847  sizeof(_ULLong) * __CHAR_BIT__ >= _Np,
1848  "The fixed_size implementation relies on one _ULLong being able to store "
1849  "all boolean elements."); // required in load & store
1850 
1851  // member types {{{
1852  using _Abi = simd_abi::fixed_size<_Np>;
1853 
1854  using _MaskMember = _SanitizedBitMask<_Np>;
1855 
1856  template <typename _Tp>
1857  using _FirstAbi = typename __fixed_size_storage_t<_Tp, _Np>::_FirstAbi;
1858 
1859  template <typename _Tp>
1860  using _TypeTag = _Tp*;
1861 
1862  // }}}
1863  // _S_broadcast {{{
1864  template <typename>
1865  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1866  _S_broadcast(bool __x)
1867  { return __x ? ~_MaskMember() : _MaskMember(); }
1868 
1869  // }}}
1870  // _S_load {{{
1871  template <typename>
1872  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1873  _S_load(const bool* __mem)
1874  {
1875  if (__builtin_is_constant_evaluated())
1876  {
1877  _MaskMember __r{};
1878  for (size_t __i = 0; __i < _Np; ++__i)
1879  __r.set(__i, __mem[__i]);
1880  return __r;
1881  }
1882  using _Ip = __int_for_sizeof_t<bool>;
1883  // the following load uses element_aligned and relies on __mem already
1884  // carrying alignment information from when this load function was
1885  // called.
1886  const simd<_Ip, _Abi> __bools(reinterpret_cast<const __may_alias<_Ip>*>(
1887  __mem),
1888  element_aligned);
1889  return __data(__bools != 0);
1890  }
1891 
1892  // }}}
1893  // _S_to_bits {{{
1894  template <bool _Sanitized>
1895  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
1896  _S_to_bits(_BitMask<_Np, _Sanitized> __x)
1897  {
1898  if constexpr (_Sanitized)
1899  return __x;
1900  else
1901  return __x._M_sanitized();
1902  }
1903 
1904  // }}}
1905  // _S_convert {{{
1906  template <typename _Tp, typename _Up, typename _UAbi>
1907  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1908  _S_convert(simd_mask<_Up, _UAbi> __x)
1909  {
1910  return _UAbi::_MaskImpl::_S_to_bits(__data(__x))
1911  .template _M_extract<0, _Np>();
1912  }
1913 
1914  // }}}
1915  // _S_from_bitmask {{{2
1916  template <typename _Tp>
1917  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1918  _S_from_bitmask(_MaskMember __bits, _TypeTag<_Tp>) noexcept
1919  { return __bits; }
1920 
1921  // _S_load {{{2
1922  static constexpr inline _MaskMember
1923  _S_load(const bool* __mem) noexcept
1924  {
1925  // TODO: _UChar is not necessarily the best type to use here. For smaller
1926  // _Np _UShort, _UInt, _ULLong, float, and double can be more efficient.
1927  _ULLong __r = 0;
1928  using _Vs = __fixed_size_storage_t<_UChar, _Np>;
1929  __for_each(_Vs{}, [&](auto __meta, auto) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1930  __r |= __meta._S_mask_to_shifted_ullong(
1931  __meta._S_mask_impl._S_load(&__mem[__meta._S_offset],
1932  _SizeConstant<__meta._S_size()>()));
1933  });
1934  return __r;
1935  }
1936 
1937  // _S_masked_load {{{2
1938  static constexpr inline _MaskMember
1939  _S_masked_load(_MaskMember __merge, _MaskMember __mask, const bool* __mem) noexcept
1940  {
1941  _BitOps::_S_bit_iteration(__mask.to_ullong(),
1942  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
1943  __merge.set(__i, __mem[__i]);
1944  });
1945  return __merge;
1946  }
1947 
1948  // _S_store {{{2
1949  static constexpr inline void
1950  _S_store(const _MaskMember __bitmask, bool* __mem) noexcept
1951  {
1952  if constexpr (_Np == 1)
1953  __mem[0] = __bitmask[0];
1954  else
1955  _FirstAbi<_UChar>::_CommonImpl::_S_store_bool_array(__bitmask, __mem);
1956  }
1957 
1958  // _S_masked_store {{{2
1959  static constexpr inline void
1960  _S_masked_store(const _MaskMember __v, bool* __mem, const _MaskMember __k) noexcept
1961  {
1962  _BitOps::_S_bit_iteration(
1963  __k, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { __mem[__i] = __v[__i]; });
1964  }
1965 
1966  // logical and bitwise operators {{{2
1967  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1968  _S_logical_and(const _MaskMember& __x, const _MaskMember& __y) noexcept
1969  { return __x & __y; }
1970 
1971  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1972  _S_logical_or(const _MaskMember& __x, const _MaskMember& __y) noexcept
1973  { return __x | __y; }
1974 
1975  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1976  _S_bit_not(const _MaskMember& __x) noexcept
1977  { return ~__x; }
1978 
1979  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1980  _S_bit_and(const _MaskMember& __x, const _MaskMember& __y) noexcept
1981  { return __x & __y; }
1982 
1983  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1984  _S_bit_or(const _MaskMember& __x, const _MaskMember& __y) noexcept
1985  { return __x | __y; }
1986 
1987  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember
1988  _S_bit_xor(const _MaskMember& __x, const _MaskMember& __y) noexcept
1989  { return __x ^ __y; }
1990 
1991  // smart_reference access {{{2
1992  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1993  _S_set(_MaskMember& __k, int __i, bool __x) noexcept
1994  { __k.set(__i, __x); }
1995 
1996  // _S_masked_assign {{{2
1997  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1998  _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const _MaskMember __rhs)
1999  { __lhs = (__lhs & ~__k) | (__rhs & __k); }
2000 
2001  // Optimization for the case where the RHS is a scalar.
2002  _GLIBCXX_SIMD_INTRINSIC static constexpr void
2003  _S_masked_assign(const _MaskMember __k, _MaskMember& __lhs, const bool __rhs)
2004  {
2005  if (__rhs)
2006  __lhs |= __k;
2007  else
2008  __lhs &= ~__k;
2009  }
2010 
2011  // }}}2
2012  // _S_all_of {{{
2013  template <typename _Tp>
2014  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
2015  _S_all_of(simd_mask<_Tp, _Abi> __k)
2016  { return __data(__k).all(); }
2017 
2018  // }}}
2019  // _S_any_of {{{
2020  template <typename _Tp>
2021  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
2022  _S_any_of(simd_mask<_Tp, _Abi> __k)
2023  { return __data(__k).any(); }
2024 
2025  // }}}
2026  // _S_none_of {{{
2027  template <typename _Tp>
2028  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
2029  _S_none_of(simd_mask<_Tp, _Abi> __k)
2030  { return __data(__k).none(); }
2031 
2032  // }}}
2033  // _S_some_of {{{
2034  template <typename _Tp>
2035  _GLIBCXX_SIMD_INTRINSIC static constexpr bool
2036  _S_some_of([[maybe_unused]] simd_mask<_Tp, _Abi> __k)
2037  {
2038  if constexpr (_Np == 1)
2039  return false;
2040  else
2041  return __data(__k).any() && !__data(__k).all();
2042  }
2043 
2044  // }}}
2045  // _S_popcount {{{
2046  template <typename _Tp>
2047  _GLIBCXX_SIMD_INTRINSIC static constexpr int
2048  _S_popcount(simd_mask<_Tp, _Abi> __k)
2049  { return __data(__k).count(); }
2050 
2051  // }}}
2052  // _S_find_first_set {{{
2053  template <typename _Tp>
2054  _GLIBCXX_SIMD_INTRINSIC static constexpr int
2055  _S_find_first_set(simd_mask<_Tp, _Abi> __k)
2056  { return std::__countr_zero(__data(__k).to_ullong()); }
2057 
2058  // }}}
2059  // _S_find_last_set {{{
2060  template <typename _Tp>
2061  _GLIBCXX_SIMD_INTRINSIC static constexpr int
2062  _S_find_last_set(simd_mask<_Tp, _Abi> __k)
2063  { return std::__bit_width(__data(__k).to_ullong()) - 1; }
2064 
2065  // }}}
2066  };
2067 // }}}1
2068 
2069 _GLIBCXX_SIMD_END_NAMESPACE
2070 #endif // __cplusplus >= 201703L
2071 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_FIXED_SIZE_H_
2072 
2073 // vim: foldmethod=marker sw=2 noet ts=8 sts=2 tw=80
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:829
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:859
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:824
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:960
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:797
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:771
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:988
complex< _Tp > pow(const complex< _Tp > &, int)
Return x to the y'th power.
Definition: complex:1019
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:889
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:741
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:933
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:289
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:1846
std::complex< _Tp > asinh(const std::complex< _Tp > &)
asinh(__z) [8.1.6].
Definition: complex:1793
std::complex< _Tp > atan(const std::complex< _Tp > &)
atan(__z) [8.1.4].
Definition: complex:1718
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:245
std::complex< _Tp > atanh(const std::complex< _Tp > &)
atanh(__z) [8.1.7].
Definition: complex:1837
std::complex< _Tp > acosh(const std::complex< _Tp > &)
acosh(__z) [8.1.5].
Definition: complex:1754
integer_sequence< size_t, _Idx... > index_sequence
Alias template index_sequence.
Definition: utility:339
std::complex< _Tp > acos(const std::complex< _Tp > &)
acos(__z) [8.1.2].
Definition: complex:1638
std::complex< _Tp > asin(const std::complex< _Tp > &)
asin(__z) [8.1.3].
Definition: complex:1674
constexpr _Iterator __base(_Iterator __it)