File Manager

003 File Manager

Current Path: /usr/src/contrib/llvm-project/clang/lib/Headers

usr / src / contrib / llvm-project / clang / lib / Headers /

📁 ..
📄 __clang_cuda_builtin_vars.h(4.56 KB)
📄 __clang_cuda_cmath.h(16.37 KB)
📄 __clang_cuda_complex_builtins.h(8.5 KB)
📄 __clang_cuda_device_functions.h(55.59 KB)
📄 __clang_cuda_intrinsics.h(21.59 KB)
📄 __clang_cuda_libdevice_declares.h(21.73 KB)
📄 __clang_cuda_math.h(15.91 KB)
📄 __clang_cuda_math_forward_declares.h(8.21 KB)
📄 __clang_cuda_runtime_wrapper.h(14.85 KB)
📄 __clang_hip_libdevice_declares.h(19.19 KB)
📄 __clang_hip_math.h(35.41 KB)
📄 __clang_hip_runtime_wrapper.h(1.79 KB)
📄 __stddef_max_align_t.h(857 B)
📄 __wmmintrin_aes.h(5.14 KB)
📄 __wmmintrin_pclmul.h(2.01 KB)
📄 adxintrin.h(2.32 KB)
📄 altivec.h(609.12 KB)
📄 ammintrin.h(7.42 KB)
📄 amxintrin.h(8.24 KB)
📄 arm64intr.h(993 B)
📄 arm_acle.h(21.87 KB)
📄 arm_cmse.h(6.21 KB)
📄 armintr.h(843 B)
📄 avx2intrin.h(38.11 KB)
📄 avx512bf16intrin.h(10.45 KB)
📄 avx512bitalgintrin.h(2.41 KB)
📄 avx512bwintrin.h(75.17 KB)
📄 avx512cdintrin.h(4.12 KB)
📄 avx512dqintrin.h(58.3 KB)
📄 avx512erintrin.h(11.7 KB)
📄 avx512fintrin.h(377.18 KB)
📄 avx512ifmaintrin.h(2.49 KB)
📄 avx512ifmavlintrin.h(4.44 KB)
📄 avx512pfintrin.h(4.53 KB)
📄 avx512vbmi2intrin.h(13.1 KB)
📄 avx512vbmiintrin.h(3.72 KB)
📄 avx512vbmivlintrin.h(6.94 KB)
📄 avx512vlbf16intrin.h(17.88 KB)
📄 avx512vlbitalgintrin.h(4.23 KB)
📄 avx512vlbwintrin.h(110.39 KB)
📄 avx512vlcdintrin.h(7.66 KB)
📄 avx512vldqintrin.h(46.24 KB)
📄 avx512vlintrin.h(322.04 KB)
📄 avx512vlvbmi2intrin.h(25.59 KB)
📄 avx512vlvnniintrin.h(7.87 KB)
📄 avx512vlvp2intersectintrin.h(4.44 KB)
📄 avx512vnniintrin.h(4.21 KB)
📄 avx512vp2intersectintrin.h(2.9 KB)
📄 avx512vpopcntdqintrin.h(2 KB)
📄 avx512vpopcntdqvlintrin.h(3.31 KB)
📄 avxintrin.h(193.62 KB)
📄 bmi2intrin.h(2.25 KB)
📄 bmiintrin.h(14.02 KB)
📄 cet.h(1.49 KB)
📄 cetintrin.h(2.78 KB)
📄 cldemoteintrin.h(1.18 KB)
📄 clflushoptintrin.h(861 B)
📄 clwbintrin.h(1.2 KB)
📄 clzerointrin.h(1.13 KB)
📄 cpuid.h(10.42 KB)
📁 cuda_wrappers
📄 emmintrin.h(180.78 KB)
📄 enqcmdintrin.h(2.12 KB)
📄 f16cintrin.h(5.37 KB)
📄 float.h(4.55 KB)
📄 fma4intrin.h(6.82 KB)
📄 fmaintrin.h(6.83 KB)
📄 fxsrintrin.h(2.82 KB)
📄 gfniintrin.h(7.66 KB)
📄 htmintrin.h(6.14 KB)
📄 htmxlintrin.h(9.01 KB)
📄 ia32intrin.h(12.65 KB)
📄 immintrin.h(18.38 KB)
📄 intrin.h(26.32 KB)
📄 inttypes.h(2.26 KB)
📄 invpcidintrin.h(764 B)
📄 iso646.h(656 B)
📄 limits.h(2.76 KB)
📄 lwpintrin.h(5 KB)
📄 lzcntintrin.h(3.18 KB)
📄 mm3dnow.h(4.5 KB)
📄 mm_malloc.h(1.7 KB)
📄 mmintrin.h(55.85 KB)
📄 module.modulemap(3.33 KB)
📄 movdirintrin.h(1.57 KB)
📄 msa.h(25.01 KB)
📄 mwaitxintrin.h(1.07 KB)
📄 nmmintrin.h(580 B)
📄 opencl-c-base.h(20.83 KB)
📄 opencl-c.h(835.47 KB)
📁 openmp_wrappers
📄 pconfigintrin.h(1.19 KB)
📄 pkuintrin.h(934 B)
📄 pmmintrin.h(10.09 KB)
📄 popcntintrin.h(1.58 KB)
📁 ppc_wrappers
📄 prfchwintrin.h(1.93 KB)
📄 ptwriteintrin.h(1.05 KB)
📄 rdseedintrin.h(1.16 KB)
📄 rtmintrin.h(1.24 KB)
📄 s390intrin.h(604 B)
📄 serializeintrin.h(881 B)
📄 sgxintrin.h(1.77 KB)
📄 shaintrin.h(1.87 KB)
📄 smmintrin.h(98.76 KB)
📄 stdalign.h(583 B)
📄 stdarg.h(1.09 KB)
📄 stdatomic.h(7.13 KB)
📄 stdbool.h(897 B)
📄 stddef.h(3.5 KB)
📄 stdint.h(22.22 KB)
📄 stdnoreturn.h(510 B)
📄 tbmintrin.h(3.15 KB)
📄 tgmath.h(29.68 KB)
📄 tmmintrin.h(29.37 KB)
📄 tsxldtrkintrin.h(1.97 KB)
📄 unwind.h(10.9 KB)
📄 vadefs.h(1.39 KB)
📄 vaesintrin.h(2.39 KB)
📄 varargs.h(477 B)
📄 vecintrin.h(356.66 KB)
📄 vpclmulqdqintrin.h(1 KB)
📄 waitpkgintrin.h(1.33 KB)
📄 wasm_simd128.h(45.99 KB)
📄 wbnoinvdintrin.h(749 B)
📄 wmmintrin.h(530 B)
📄 x86intrin.h(1.67 KB)
📄 xmmintrin.h(106.24 KB)
📄 xopintrin.h(19.91 KB)
📄 xsavecintrin.h(1010 B)
📄 xsaveintrin.h(1.64 KB)
📄 xsaveoptintrin.h(1 KB)
📄 xsavesintrin.h(1.24 KB)
📄 xtestintrin.h(873 B)

Editing: avx512bf16intrin.h

/*===------------ avx512bf16intrin.h - AVX512_BF16 intrinsics --------------===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 */
#ifndef __IMMINTRIN_H
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
#endif

#ifndef __AVX512BF16INTRIN_H
#define __AVX512BF16INTRIN_H

typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
typedef unsigned short __bfloat16;

#define __DEFAULT_FN_ATTRS512 \
  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
                 __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS                                                     \
  __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))

/// Convert One BF16 Data to One Single Float Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic does not correspond to a specific instruction.
///
/// \param __A
///    A bfloat data.
/// \returns A float data whose sign field and exponent field keep unchanged,
///    and fraction field is extended to 23 bits.
static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) {
  return __builtin_ia32_cvtsbf162ss_32(__A);
}

/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
///    A 512-bit vector of [16 x float].
/// \param __B
///    A 512-bit vector of [16 x float].
/// \param __W
///    A 512-bit vector of [32 x bfloat].
/// \param __U
///    A 32-bit mask value specifying what is chosen for each element.
///    A 1 means conversion of __A or __B. A 0 means element from __W.
/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from
///    conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) {
  return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U,
                                        (__v32hi)_mm512_cvtne2ps_pbh(__A, __B),
                                        (__v32hi)__W);
}

/// Convert Two Packed Single Data to One Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNE2PS2BF16 </c> instructions.
///
/// \param __A
///    A 512-bit vector of [16 x float].
/// \param __B
///    A 512-bit vector of [16 x float].
/// \param __U
///    A 32-bit mask value specifying what is chosen for each element.
///    A 1 means conversion of __A or __B. A 0 means element is zero.
/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from
///    conversion of __B, and higher 256 bits come from conversion of __A.
static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) {
  return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U,
                                        (__v32hi)_mm512_cvtne2ps_pbh(__A, __B),
                                        (__v32hi)_mm512_setzero_si512());
}

/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
///    A 512-bit vector of [16 x float].
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_cvtneps_pbh(__m512 __A) {
  return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
                                              (__v16hi)_mm256_undefined_si256(),
                                              (__mmask16)-1);
}

/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
///    A 512-bit vector of [16 x float].
/// \param __W
///    A 256-bit vector of [16 x bfloat].
/// \param __U
///    A 16-bit mask value specifying what is chosen for each element.
///    A 1 means conversion of __A. A 0 means element from __W.
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) {
  return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
                                                        (__v16hi)__W,
                                                        (__mmask16)__U);
}

/// Convert Packed Single Data to Packed BF16 Data.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VCVTNEPS2BF16 </c> instructions.
///
/// \param __A
///    A 512-bit vector of [16 x float].
/// \param __U
///    A 16-bit mask value specifying what is chosen for each element.
///    A 1 means conversion of __A. A 0 means element is zero.
/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A.
static __inline__ __m256bh __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) {
  return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A,
                                                (__v16hi)_mm256_setzero_si256(),
                                                (__mmask16)__U);
}

/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
///    A 512-bit vector of [32 x bfloat].
/// \param __B
///    A 512-bit vector of [32 x bfloat].
/// \param __D
///    A 512-bit vector of [16 x float].
/// \returns A 512-bit vector of [16 x float] comes from  Dot Product of
///  __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) {
  return (__m512)__builtin_ia32_dpbf16ps_512((__v16sf) __D,
                                             (__v16si) __A,
                                             (__v16si) __B);
}

/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
///    A 512-bit vector of [32 x bfloat].
/// \param __B
///    A 512-bit vector of [32 x bfloat].
/// \param __D
///    A 512-bit vector of [16 x float].
/// \param __U
///    A 16-bit mask value specifying what is chosen for each element.
///    A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.
/// \returns A 512-bit vector of [16 x float] comes from  Dot Product of
///  __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) {
  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                       (__v16sf)_mm512_dpbf16_ps(__D, __A, __B),
                                       (__v16sf)__D);
}

/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> VDPBF16PS </c> instructions.
///
/// \param __A
///    A 512-bit vector of [32 x bfloat].
/// \param __B
///    A 512-bit vector of [32 x bfloat].
/// \param __D
///    A 512-bit vector of [16 x float].
/// \param __U
///    A 16-bit mask value specifying what is chosen for each element.
///    A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.
/// \returns A 512-bit vector of [16 x float] comes from  Dot Product of
///  __A, __B and __D
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) {
  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                       (__v16sf)_mm512_dpbf16_ps(__D, __A, __B),
                                       (__v16sf)_mm512_setzero_si512());
}

/// Convert Packed BF16 Data to Packed float Data.
///
/// \headerfile <x86intrin.h>
///
/// \param __A
///    A 256-bit vector of [16 x bfloat].
/// \returns A 512-bit vector of [16 x float] come from convertion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
  return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
      (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
}

/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
///
/// \headerfile <x86intrin.h>
///
/// \param __U
///    A 16-bit mask. Elements are zeroed out when the corresponding mask
///    bit is not set.
/// \param __A
///    A 256-bit vector of [16 x bfloat].
/// \returns A 512-bit vector of [16 x float] come from convertion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
  return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
      (__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16));
}

/// Convert Packed BF16 Data to Packed float Data using merging mask.
///
/// \headerfile <x86intrin.h>
///
/// \param __S
///    A 512-bit vector of [16 x float]. Elements are copied from __S when
///     the corresponding mask bit is not set.
/// \param __U
///    A 16-bit mask.
/// \param __A
///    A 256-bit vector of [16 x bfloat].
/// \returns A 512-bit vector of [16 x float] come from convertion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) {
  return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32(
      (__m512i)__S, (__mmask16)__U,
      (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
}

#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS512

#endif

003 File Manager

Editing: avx512bf16intrin.h

Upload File

Create Folder