|
Arch | get_arch_gcc_builtin_cpu_supports () |
| Retrieves supported architecture using GCC __builtin_cpu_supports function. More...
|
|
Arch | get_arch_linux_cpuinfo () |
| Retrieves supported architecture from Linux /proc/cpuinfo file. More...
|
|
basic_int8x16 | bit_and (basic_int8x16 a, int128 b) |
| Computes bitwise AND of integer vectors. More...
|
|
basic_int16x8 | bit_and (basic_int16x8 a, int128 b) |
|
basic_int32x4 | bit_and (basic_int32x4 a, int128 b) |
|
basic_int64x2 | bit_and (basic_int64x2 a, int128 b) |
|
basic_int8x32 | bit_and (basic_int8x32 a, int256 b) |
|
basic_int16x16 | bit_and (basic_int16x16 a, int256 b) |
|
basic_int32x8 | bit_and (basic_int32x8 a, int256 b) |
|
basic_int64x4 | bit_and (basic_int64x4 a, int256 b) |
|
basic_int8x16 | bit_and (basic_int8x16 a, mask_int8x16 b) |
|
basic_int16x8 | bit_and (basic_int16x8 a, mask_int16x8 b) |
|
basic_int32x4 | bit_and (basic_int32x4 a, mask_int32x4 b) |
|
basic_int64x2 | bit_and (basic_int64x2 a, mask_int64x2 b) |
|
basic_int8x32 | bit_and (basic_int8x32 a, mask_int8x32 b) |
|
basic_int16x16 | bit_and (basic_int16x16 a, mask_int16x16 b) |
|
basic_int32x8 | bit_and (basic_int32x8 a, mask_int32x8 b) |
|
basic_int64x4 | bit_and (basic_int64x4 a, mask_int64x4 b) |
|
mask_int8x16 | bit_and (mask_int8x16 a, mask_int8x16 b) |
|
mask_int16x8 | bit_and (mask_int16x8 a, mask_int16x8 b) |
|
mask_int32x4 | bit_and (mask_int32x4 a, mask_int32x4 b) |
|
mask_int64x2 | bit_and (mask_int64x2 a, mask_int64x2 b) |
|
mask_int8x32 | bit_and (mask_int8x32 a, mask_int8x32 b) |
|
mask_int16x16 | bit_and (mask_int16x16 a, mask_int16x16 b) |
|
mask_int32x8 | bit_and (mask_int32x8 a, mask_int32x8 b) |
|
mask_int64x4 | bit_and (mask_int64x4 a, mask_int64x4 b) |
|
void | prefetch_read (const void *ptr) |
| Prefetches data to the lowest level cache for reading. More...
|
|
void | prefetch_write (const void *ptr) |
| Prefetches data to the lowest level cache for writing. More...
|
|
template<class R , class T > |
R | bit_cast (T t) |
| Casts between unrelated types. More...
|
|
mask_int8x16 | cmp_eq (basic_int8x16 a, basic_int8x16 b) |
| Compares 8-bit values for equality. More...
|
|
mask_int8x32 | cmp_eq (basic_int8x32 a, basic_int8x32 b) |
|
mask_float64x2 | cmp_gt (float64x2 a, float64x2 b) |
| Compares the values of two float64x2 vectors for greater-than. More...
|
|
mask_float64x4 | cmp_gt (float64x4 a, float64x4 b) |
|
mask_float64x2 | cmp_ge (float64x2 a, float64x2 b) |
| Compares the values of two float64x2 vectors for greater-than. More...
|
|
mask_float64x4 | cmp_ge (float64x4 a, float64x4 b) |
|
basic_int16x8 | to_int16x8 (int8x16 a) |
| Sign extends the first 8 values of a signed int8x16 vector to 16-bits. More...
|
|
basic_int16x16 | to_int16x16 (int8x32 a) |
| Sign extends the first 16 values of a signed int8x32 vector to 16-bits. More...
|
|
basic_int16x8 | to_int16x8 (uint8x16 a) |
| Extends the first 8 values of a unsigned int8x16 vector to 16-bits. More...
|
|
basic_int16x16 | to_int16x16 (uint8x32 a) |
| Extends the first 16 values of a unsigned int8x32 vector to 16-bits. More...
|
|
basic_int32x4 | to_int32x4 (int16x8 a) |
| Sign extends the first 4 values of a signed int16x8 vector to 32-bits. More...
|
|
basic_int32x8 | to_int32x8 (int16x16 a) |
| Sign extends the first 8 values of a signed int16x16 vector to 32-bits. More...
|
|
basic_int32x4 | to_int32x4 (uint16x8 a) |
| Zero-extends the values of a unsigned int16x8 vector to 32-bits. More...
|
|
basic_int32x8 | to_int32x8 (uint16x16 a) |
| Zero-extends the first 8 values of a unsigned int16x16 vector to 32-bits. More...
|
|
template<unsigned id> |
float | extract (float32x4 a) |
| Extracts an element from float32x4 vector. More...
|
|
template<unsigned id> |
double | extract (float64x2 a) |
| Extracts an element from float64x2 vector. More...
|
|
uint16_t | extract_bits_any (uint8x16 a) |
| Extracts a bit from each byte of each element of a int8x16 vector. More...
|
|
template<unsigned id> |
uint16_t | extract_bits (uint8x16 a) |
| Extracts specific bit from each byte of each element of a int8x16 vector. More...
|
|
template<unsigned id> |
basic_int8x16 | insert (basic_int8x16 a, uint8_t x) |
| Inserts an element into int8x16 vector at the position identified by id. More...
|
|
template<unsigned id> |
basic_int16x8 | insert (basic_int16x8 a, uint16_t x) |
| Inserts an element into int16x8 vector at the position identified by id. More...
|
|
template<unsigned id> |
basic_int32x4 | insert (basic_int32x4 a, uint32_t x) |
| Inserts an element into int32x4 vector at the position identified by id. More...
|
|
template<unsigned id> |
basic_int64x2 | insert (basic_int64x2 a, uint64_t x) |
| Inserts an element into int64x2 vector at the position identified by id. More...
|
|
template<unsigned id> |
float32x4 | insert (float32x4 a, float x) |
| Inserts an element into float32x4 vector at the position identified by id. More...
|
|
template<unsigned id> |
float64x2 | insert (float64x2 a, double x) |
| Inserts an element into float64x2 vector at the position identified by id. More...
|
|
float32x4 | abs (float32x4 a) |
| Computes absolute value of floating point values. More...
|
|
float32x8 | abs (float32x8 a) |
|
basic_int8x16 | add (basic_int8x16 a, basic_int8x16 b) |
| Adds 8-bit integer values. More...
|
|
basic_int8x32 | add (basic_int8x32 a, basic_int8x32 b) |
|
int8x16 | shift_r (int8x16 a, unsigned count) |
| Shifts signed 8-bit values right by count bits while shifting in the sign bit. More...
|
|
int8x32 | shift_r (int8x32 a, unsigned count) |
|
int128 | load (int128 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an aligned memory location. More...
|
|
int256 | load (int256 &a, const void *p) |
|
float32x4 | load (float32x4 &a, const float *p) |
|
float32x8 | load (float32x8 &a, const float *p) |
|
float64x2 | load (float64x2 &a, const double *p) |
|
float64x4 | load (float64x4 &a, const double *p) |
|
void | load_packed2 (float32x4 &a, float32x4 &b, const float *p) |
| Loads 32-bit float values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
void | load_packed2 (float32x8 &a, float32x8 &b, const float *p) |
|
void | store (void *p, int128 a) |
| Stores a 128-bit or 256-bit integer vector to an aligned memory location. More...
|
|
void | store (void *p, int256 a) |
|
void | store (float *p, float32x4 a) |
|
void | store (float *p, float32x8 a) |
|
void | store (double *p, float64x2 a) |
|
void | store (double *p, float64x4 a) |
|
basic_int8x16 | zip_lo (basic_int8x16 a, basic_int8x16 b) |
| Interleaves the lower halves of two vectors. More...
|
|
basic_int8x32 | zip_lo (basic_int8x32 a, basic_int8x32 b) |
|
basic_int16x8 | zip_lo (basic_int16x8 a, basic_int16x8 b) |
|
basic_int16x16 | zip_lo (basic_int16x16 a, basic_int16x16 b) |
|
basic_int32x4 | zip_lo (basic_int32x4 a, basic_int32x4 b) |
|
basic_int32x8 | zip_lo (basic_int32x8 a, basic_int32x8 b) |
|
basic_int64x2 | zip_lo (basic_int64x2 a, basic_int64x2 b) |
|
basic_int64x4 | zip_lo (basic_int64x4 a, basic_int64x4 b) |
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
basic_int64x4 | permute (basic_int64x4 a) |
| Permutes the values of each set of four consecutive 64-bit values. More...
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
float64x4 | permute (float64x4 a) |
| Permutes the values of each set of four consecutive 64-bit floating-point values. More...
|
|
Arch | this_compile_arch () |
| Returns the instruction set flags that will be required by the currently compiled code. More...
|
|
void | transpose2 (basic_int16x8 &a0, basic_int16x8 &a1) |
| Transposes four 2x2 16-bit matrices within two int16x8 vectors. More...
|
|
void | transpose2 (basic_int16x16 &a0, basic_int16x16 &a1) |
|
void | transpose8 (basic_int8x16 &a0, basic_int8x16 &a1, basic_int8x16 &a2, basic_int8x16 &a3, basic_int8x16 &a4, basic_int8x16 &a5, basic_int8x16 &a6, basic_int8x16 &a7) |
| Transposes two 8x8 8-bit matrices within eight int8x16 vectors. More...
|
|
void | transpose8 (basic_int8x32 &a0, basic_int8x32 &a1, basic_int8x32 &a2, basic_int8x32 &a3, basic_int8x32 &a4, basic_int8x32 &a5, basic_int8x32 &a6, basic_int8x32 &a7) |
|
void | transpose8 (basic_int16x8 &a0, basic_int16x8 &a1, basic_int16x8 &a2, basic_int16x8 &a3, basic_int16x8 &a4, basic_int16x8 &a5, basic_int16x8 &a6, basic_int16x8 &a7) |
| Transposes a 8x8 16-bit matrix within eight int16x8 vectors. More...
|
|
void | transpose8 (basic_int16x16 &a0, basic_int16x16 &a1, basic_int16x16 &a2, basic_int16x16 &a3, basic_int16x16 &a4, basic_int16x16 &a5, basic_int16x16 &a6, basic_int16x16 &a7) |
|
void | transpose16 (basic_int8x16 &a0, basic_int8x16 &a1, basic_int8x16 &a2, basic_int8x16 &a3, basic_int8x16 &a4, basic_int8x16 &a5, basic_int8x16 &a6, basic_int8x16 &a7, basic_int8x16 &a8, basic_int8x16 &a9, basic_int8x16 &a10, basic_int8x16 &a11, basic_int8x16 &a12, basic_int8x16 &a13, basic_int8x16 &a14, basic_int8x16 &a15) |
| Transposes a 16x16 8-bit matrix within sixteen int8x16 vectors. More...
|
|
void | transpose16 (basic_int8x32 &a0, basic_int8x32 &a1, basic_int8x32 &a2, basic_int8x32 &a3, basic_int8x32 &a4, basic_int8x32 &a5, basic_int8x32 &a6, basic_int8x32 &a7, basic_int8x32 &a8, basic_int8x32 &a9, basic_int8x32 &a10, basic_int8x32 &a11, basic_int8x32 &a12, basic_int8x32 &a13, basic_int8x32 &a14, basic_int8x32 &a15) |
|
|
Arch & | operator|= (Arch &x, const Arch &y) |
| Bitwise operators for Arch . More...
|
|
Arch & | operator&= (Arch &x, const Arch &y) |
| Bitwise operators for Arch . More...
|
|
Arch | operator| (const Arch &x, const Arch &y) |
| Bitwise operators for Arch . More...
|
|
Arch | operator& (const Arch &x, const Arch &y) |
| Bitwise operators for Arch . More...
|
|
Arch | operator~ (const Arch &x) |
| Bitwise operators for Arch . More...
|
|
|
float32x4 | bit_and (float32x4 a, float32x4 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float32x8 | bit_and (float32x8 a, float32x8 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float32x4 | bit_and (float32x4 a, int128 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float32x8 | bit_and (float32x8 a, int256 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float32x4 | bit_and (float32x4 a, mask_float32x4 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float32x8 | bit_and (float32x8 a, mask_float32x8 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
mask_float32x4 | bit_and (mask_float32x4 a, mask_float32x4 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
mask_float32x8 | bit_and (mask_float32x8 a, mask_float32x8 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float64x2 | bit_and (float64x2 a, float64x2 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float64x4 | bit_and (float64x4 a, float64x4 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float64x2 | bit_and (float64x2 a, int128 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float64x4 | bit_and (float64x4 a, int256 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float64x2 | bit_and (float64x2 a, mask_float64x2 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
float64x4 | bit_and (float64x4 a, mask_float64x4 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
mask_float64x2 | bit_and (mask_float64x2 a, mask_float64x2 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
mask_float64x4 | bit_and (mask_float64x4 a, mask_float64x4 b) |
| Computes bitwise AND of floating-point vectors. More...
|
|
|
basic_int8x16 | bit_andnot (basic_int8x16 a, int128 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int16x8 | bit_andnot (basic_int16x8 a, int128 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int32x4 | bit_andnot (basic_int32x4 a, int128 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int64x2 | bit_andnot (basic_int64x2 a, int128 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int8x32 | bit_andnot (basic_int8x32 a, int256 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int16x16 | bit_andnot (basic_int16x16 a, int256 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int32x8 | bit_andnot (basic_int32x8 a, int256 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int64x4 | bit_andnot (basic_int64x4 a, int256 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int8x16 | bit_andnot (basic_int8x16 a, mask_int8x16 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int16x8 | bit_andnot (basic_int16x8 a, mask_int16x8 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int32x4 | bit_andnot (basic_int32x4 a, mask_int32x4 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int64x2 | bit_andnot (basic_int64x2 a, mask_int64x2 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int8x32 | bit_andnot (basic_int8x32 a, mask_int8x32 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int16x16 | bit_andnot (basic_int16x16 a, mask_int16x16 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int32x8 | bit_andnot (basic_int32x8 a, mask_int32x8 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
basic_int64x4 | bit_andnot (basic_int64x4 a, mask_int64x4 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int8x16 | bit_andnot (mask_int8x16 a, mask_int8x16 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int16x8 | bit_andnot (mask_int16x8 a, mask_int16x8 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int32x4 | bit_andnot (mask_int32x4 a, mask_int32x4 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int64x2 | bit_andnot (mask_int64x2 a, mask_int64x2 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int8x32 | bit_andnot (mask_int8x32 a, mask_int8x32 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int16x16 | bit_andnot (mask_int16x16 a, mask_int16x16 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int32x8 | bit_andnot (mask_int32x8 a, mask_int32x8 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
mask_int64x4 | bit_andnot (mask_int64x4 a, mask_int64x4 b) |
| Computes bitwise AND NOT of integer vectors. More...
|
|
|
float32x4 | bit_andnot (float32x4 a, float32x4 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float32x8 | bit_andnot (float32x8 a, float32x8 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float32x4 | bit_andnot (float32x4 a, int128 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float32x8 | bit_andnot (float32x8 a, int256 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float32x4 | bit_andnot (float32x4 a, mask_float32x4 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float32x8 | bit_andnot (float32x8 a, mask_float32x8 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
mask_float32x4 | bit_andnot (mask_float32x4 a, mask_float32x4 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
mask_float32x8 | bit_andnot (mask_float32x8 a, mask_float32x8 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float64x2 | bit_andnot (float64x2 a, float64x2 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float64x4 | bit_andnot (float64x4 a, float64x4 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float64x2 | bit_andnot (float64x2 a, int128 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float64x4 | bit_andnot (float64x4 a, int256 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float64x2 | bit_andnot (float64x2 a, mask_float64x2 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
float64x4 | bit_andnot (float64x4 a, mask_float64x4 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
mask_float64x2 | bit_andnot (mask_float64x2 a, mask_float64x2 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
mask_float64x4 | bit_andnot (mask_float64x4 a, mask_float64x4 b) |
| Computes bitwise AND NOT of floating-point vectors. More...
|
|
|
basic_int8x16 | bit_or (basic_int8x16 a, int128 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int16x8 | bit_or (basic_int16x8 a, int128 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int32x4 | bit_or (basic_int32x4 a, int128 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int64x2 | bit_or (basic_int64x2 a, int128 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int8x32 | bit_or (basic_int8x32 a, int256 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int16x16 | bit_or (basic_int16x16 a, int256 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int32x8 | bit_or (basic_int32x8 a, int256 b) |
| Computes bitwise OR of integer vectors. More...
|
|
basic_int64x4 | bit_or (basic_int64x4 a, int256 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int8x16 | bit_or (mask_int8x16 a, mask_int8x16 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int16x8 | bit_or (mask_int16x8 a, mask_int16x8 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int32x4 | bit_or (mask_int32x4 a, mask_int32x4 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int64x2 | bit_or (mask_int64x2 a, mask_int64x2 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int8x32 | bit_or (mask_int8x32 a, mask_int8x32 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int16x16 | bit_or (mask_int16x16 a, mask_int16x16 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int32x8 | bit_or (mask_int32x8 a, mask_int32x8 b) |
| Computes bitwise OR of integer vectors. More...
|
|
mask_int64x4 | bit_or (mask_int64x4 a, mask_int64x4 b) |
| Computes bitwise OR of integer vectors. More...
|
|
|
float32x4 | bit_or (float32x4 a, float32x4 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float32x8 | bit_or (float32x8 a, float32x8 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float32x4 | bit_or (float32x4 a, int128 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float32x8 | bit_or (float32x8 a, int256 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float64x2 | bit_or (float64x2 a, float64x2 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float64x4 | bit_or (float64x4 a, float64x4 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float64x2 | bit_or (float64x2 a, int128 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
float64x4 | bit_or (float64x4 a, int256 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
mask_float32x4 | bit_or (mask_float32x4 a, mask_float32x4 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
mask_float64x2 | bit_or (mask_float64x2 a, mask_float64x2 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
mask_float32x8 | bit_or (mask_float32x8 a, mask_float32x8 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
mask_float64x4 | bit_or (mask_float64x4 a, mask_float64x4 b) |
| Computes bitwise OR of floating-point vectors. More...
|
|
|
basic_int8x16 | bit_xor (basic_int8x16 a, int128 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int16x8 | bit_xor (basic_int16x8 a, int128 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int32x4 | bit_xor (basic_int32x4 a, int128 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int64x2 | bit_xor (basic_int64x2 a, int128 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int8x32 | bit_xor (basic_int8x32 a, int256 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int16x16 | bit_xor (basic_int16x16 a, int256 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int32x8 | bit_xor (basic_int32x8 a, int256 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
basic_int64x4 | bit_xor (basic_int64x4 a, int256 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int8x16 | bit_xor (mask_int8x16 a, mask_int8x16 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int16x8 | bit_xor (mask_int16x8 a, mask_int16x8 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int32x4 | bit_xor (mask_int32x4 a, mask_int32x4 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int64x2 | bit_xor (mask_int64x2 a, mask_int64x2 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int8x32 | bit_xor (mask_int8x32 a, mask_int8x32 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int16x16 | bit_xor (mask_int16x16 a, mask_int16x16 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int32x8 | bit_xor (mask_int32x8 a, mask_int32x8 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
mask_int64x4 | bit_xor (mask_int64x4 a, mask_int64x4 b) |
| Computes bitwise XOR of integer vectors. More...
|
|
|
float32x4 | bit_xor (float32x4 a, float32x4 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float32x8 | bit_xor (float32x8 a, float32x8 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float32x4 | bit_xor (float32x4 a, int128 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float32x8 | bit_xor (float32x8 a, int256 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float64x2 | bit_xor (float64x2 a, float64x2 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float64x4 | bit_xor (float64x4 a, float64x4 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float64x2 | bit_xor (float64x2 a, int128 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
float64x4 | bit_xor (float64x4 a, int256 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
mask_float32x4 | bit_xor (mask_float32x4 a, mask_float32x4 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
mask_float64x2 | bit_xor (mask_float64x2 a, mask_float64x2 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
mask_float32x8 | bit_xor (mask_float32x8 a, mask_float32x8 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
mask_float64x4 | bit_xor (mask_float64x4 a, mask_float64x4 b) |
| Computes bitwise XOR of floating-point vectors. More...
|
|
|
basic_int8x16 | bit_not (basic_int8x16 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int16x8 | bit_not (basic_int16x8 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int32x4 | bit_not (basic_int32x4 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int64x2 | bit_not (basic_int64x2 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int8x32 | bit_not (basic_int8x32 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int16x16 | bit_not (basic_int16x16 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int32x8 | bit_not (basic_int32x8 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
basic_int64x4 | bit_not (basic_int64x4 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int8x16 | bit_not (mask_int8x16 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int16x8 | bit_not (mask_int16x8 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int32x4 | bit_not (mask_int32x4 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int64x2 | bit_not (mask_int64x2 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int8x32 | bit_not (mask_int8x32 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int16x16 | bit_not (mask_int16x16 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int32x8 | bit_not (mask_int32x8 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
mask_int64x4 | bit_not (mask_int64x4 a) |
| Computes bitwise NOT of an integer vector. More...
|
|
|
float32x4 | bit_not (float32x4 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
float64x2 | bit_not (float64x2 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
float32x8 | bit_not (float32x8 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
float64x4 | bit_not (float64x4 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
mask_float32x4 | bit_not (mask_float32x4 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
mask_float64x2 | bit_not (mask_float64x2 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
mask_float32x8 | bit_not (mask_float32x8 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
mask_float64x4 | bit_not (mask_float64x4 a) |
| Computes bitwise NOT of a floating-point vector. More...
|
|
|
mask_int16x8 | cmp_eq (basic_int16x8 a, basic_int16x8 b) |
| Compares 16-bit values for equality. More...
|
|
mask_int16x16 | cmp_eq (basic_int16x16 a, basic_int16x16 b) |
| Compares 16-bit values for equality. More...
|
|
|
mask_int32x4 | cmp_eq (basic_int32x4 a, basic_int32x4 b) |
| Compares the values of two int32x4 vectors for equality. More...
|
|
mask_int32x8 | cmp_eq (basic_int32x8 a, basic_int32x8 b) |
| Compares the values of two int32x4 vectors for equality. More...
|
|
|
mask_int64x2 | cmp_eq (basic_int64x2 a, basic_int64x2 b) |
| Compares the values of two int64x2 vectors for equality. More...
|
|
mask_int64x4 | cmp_eq (basic_int64x4 a, basic_int64x4 b) |
| Compares the values of two int64x2 vectors for equality. More...
|
|
|
mask_float32x4 | cmp_eq (float32x4 a, float32x4 b) |
| Compares the values of two float32x4 vectors for equality. More...
|
|
mask_float32x8 | cmp_eq (float32x8 a, float32x8 b) |
| Compares the values of two float32x4 vectors for equality. More...
|
|
|
mask_float64x2 | cmp_eq (float64x2 a, float64x2 b) |
| Compares the values of two float64x2 vectors for equality. More...
|
|
mask_float64x4 | cmp_eq (float64x4 a, float64x4 b) |
| Compares the values of two float64x2 vectors for equality. More...
|
|
|
mask_int8x16 | cmp_neq (basic_int8x16 a, basic_int8x16 b) |
| Compares the values of two int8x16 vectors for inequality. More...
|
|
mask_int8x32 | cmp_neq (basic_int8x32 a, basic_int8x32 b) |
| Compares the values of two int8x16 vectors for inequality. More...
|
|
|
mask_int16x8 | cmp_neq (basic_int16x8 a, basic_int16x8 b) |
| Compares the values of two int16x8 vectors for inequality. More...
|
|
mask_int16x16 | cmp_neq (basic_int16x16 a, basic_int16x16 b) |
| Compares the values of two int16x8 vectors for inequality. More...
|
|
|
mask_int32x4 | cmp_neq (basic_int32x4 a, basic_int32x4 b) |
| Compares the values of two int32x4 vectors for inequality. More...
|
|
mask_int32x8 | cmp_neq (basic_int32x8 a, basic_int32x8 b) |
| Compares the values of two int32x4 vectors for inequality. More...
|
|
|
mask_int64x2 | cmp_neq (basic_int64x2 a, basic_int64x2 b) |
| Compares the values of two int64x2 vectors for inequality. More...
|
|
mask_int64x4 | cmp_neq (basic_int64x4 a, basic_int64x4 b) |
| Compares the values of two int64x2 vectors for inequality. More...
|
|
|
mask_float32x4 | cmp_neq (float32x4 a, float32x4 b) |
| Compares the values of two float32x4 vectors for inequality. More...
|
|
mask_float32x8 | cmp_neq (float32x8 a, float32x8 b) |
| Compares the values of two float32x4 vectors for inequality. More...
|
|
|
mask_float64x2 | cmp_neq (float64x2 a, float64x2 b) |
| Compares the values of two float64x2 vectors for inequality. More...
|
|
mask_float64x4 | cmp_neq (float64x4 a, float64x4 b) |
| Compares the values of two float64x2 vectors for inequality. More...
|
|
|
mask_int8x16 | cmp_gt (int8x16 a, int8x16 b) |
| Compares the values of two signed int16x8 vectors for greater-than. More...
|
|
mask_int8x32 | cmp_gt (int8x32 a, int8x32 b) |
| Compares the values of two signed int16x8 vectors for greater-than. More...
|
|
|
mask_int8x16 | cmp_gt (uint8x16 a, uint8x16 b) |
| Compares the values of two unsigned int16x8 vectors for greater-than. More...
|
|
mask_int8x32 | cmp_gt (uint8x32 a, uint8x32 b) |
| Compares the values of two unsigned int16x8 vectors for greater-than. More...
|
|
|
mask_int16x8 | cmp_gt (int16x8 a, int16x8 b) |
| Compares the values of two signed int16x8 vectors for greater-than. More...
|
|
mask_int16x16 | cmp_gt (int16x16 a, int16x16 b) |
| Compares the values of two signed int16x8 vectors for greater-than. More...
|
|
|
mask_int16x8 | cmp_gt (uint16x8 a, uint16x8 b) |
| Compares the values of two unsigned int16x8 vectors for greater-than. More...
|
|
mask_int16x16 | cmp_gt (uint16x16 a, uint16x16 b) |
| Compares the values of two unsigned int16x8 vectors for greater-than. More...
|
|
|
mask_int32x4 | cmp_gt (int32x4 a, int32x4 b) |
| Compares the values of two signed int32x4 vectors for greater-than. More...
|
|
mask_int32x8 | cmp_gt (int32x8 a, int32x8 b) |
| Compares the values of two signed int32x4 vectors for greater-than. More...
|
|
|
mask_int32x4 | cmp_gt (uint32x4 a, uint32x4 b) |
| Compares the values of two unsigned int32x4 vectors for greater-than. More...
|
|
mask_int32x8 | cmp_gt (uint32x8 a, uint32x8 b) |
| Compares the values of two unsigned int32x4 vectors for greater-than. More...
|
|
|
mask_float32x4 | cmp_gt (float32x4 a, float32x4 b) |
| Compares the values of two float32x4 vectors for greater-than. More...
|
|
mask_float32x8 | cmp_gt (float32x8 a, float32x8 b) |
| Compares the values of two float32x4 vectors for greater-than. More...
|
|
|
mask_float32x4 | cmp_ge (float32x4 a, float32x4 b) |
| Compares the values of two float32x4 vectors for greater-than or equal. More...
|
|
mask_float32x8 | cmp_ge (float32x8 a, float32x8 b) |
| Compares the values of two float32x4 vectors for greater-than or equal. More...
|
|
|
mask_int8x16 | cmp_lt (int8x16 a, int8x16 b) |
| Compares the values of two signed int8x16 vectors for less-than. More...
|
|
mask_int8x32 | cmp_lt (int8x32 a, int8x32 b) |
| Compares the values of two signed int8x16 vectors for less-than. More...
|
|
|
mask_int8x16 | cmp_lt (uint8x16 a, uint8x16 b) |
| Compares the values of two unsigned int8x16 vectors for less-than. More...
|
|
mask_int8x32 | cmp_lt (uint8x32 a, uint8x32 b) |
| Compares the values of two unsigned int8x16 vectors for less-than. More...
|
|
|
mask_int16x8 | cmp_lt (int16x8 a, int16x8 b) |
| Compares the values of two signed int16x8 vectors for less-than. More...
|
|
mask_int16x16 | cmp_lt (int16x16 a, int16x16 b) |
| Compares the values of two signed int16x8 vectors for less-than. More...
|
|
|
mask_int16x8 | cmp_lt (uint16x8 a, uint16x8 b) |
| Compares the values of two unsigned int16x8 vectors for less-than. More...
|
|
mask_int16x16 | cmp_lt (uint16x16 a, uint16x16 b) |
| Compares the values of two unsigned int16x8 vectors for less-than. More...
|
|
|
mask_int32x4 | cmp_lt (int32x4 a, int32x4 b) |
| Compares the values of two signed int32x4 vectors for less-than. More...
|
|
mask_int32x8 | cmp_lt (int32x8 a, int32x8 b) |
| Compares the values of two signed int32x4 vectors for less-than. More...
|
|
|
mask_int32x4 | cmp_lt (uint32x4 a, uint32x4 b) |
| Compares the values of two unsigned int32x4 vectors for less-than. More...
|
|
mask_int32x8 | cmp_lt (uint32x8 a, uint32x8 b) |
| Compares the values of two unsigned int32x4 vectors for less-than. More...
|
|
|
mask_float32x4 | cmp_lt (float32x4 a, float32x4 b) |
| Compares the values of two float32x4 vectors for less-than. More...
|
|
mask_float32x8 | cmp_lt (float32x8 a, float32x8 b) |
| Compares the values of two float32x4 vectors for less-than. More...
|
|
|
mask_float64x2 | cmp_lt (float64x2 a, float64x2 b) |
| Compares the values of two float64x2 vectors for less-than. More...
|
|
mask_float64x4 | cmp_lt (float64x4 a, float64x4 b) |
| Compares the values of two float64x2 vectors for less-than. More...
|
|
|
mask_float32x4 | cmp_le (float32x4 a, float32x4 b) |
| Compares the values of two float32x4 vectors for less-than or equal. More...
|
|
mask_float32x8 | cmp_le (float32x8 a, float32x8 b) |
| Compares the values of two float32x4 vectors for less-than or equal. More...
|
|
|
mask_float64x2 | cmp_le (float64x2 a, float64x2 b) |
| Compares the values of two float64x2 vectors for less-than or equal. More...
|
|
mask_float64x4 | cmp_le (float64x4 a, float64x4 b) |
| Compares the values of two float64x2 vectors for less-than or equal. More...
|
|
|
basic_int32x4 | to_int32x4 (float32x4 a) |
| Converts the values of a float32x4 vector into signed int32_t representation using truncation if only an inexact conversion can be performed. More...
|
|
basic_int32x8 | to_int32x8 (float32x8 a) |
| Converts the values of a float32x4 vector into signed int32_t representation using truncation if only an inexact conversion can be performed. More...
|
|
|
basic_int32x4 | to_int32x4 (float64x2 a) |
| Converts the values of a doublex2 vector into int32_t representation using truncation. More...
|
|
basic_int32x8 | to_int32x8 (float64x4 a) |
| Converts the values of a doublex2 vector into int32_t representation using truncation. More...
|
|
|
basic_int64x2 | to_int64x2 (int32x4 a) |
| Extends the values of a signed int32x4 vector to 64-bits. More...
|
|
basic_int64x4 | to_int64x4 (int32x8 a) |
| Extends the values of a signed int32x4 vector to 64-bits. More...
|
|
basic_int64x2 | to_int64x2 (uint32x4 a) |
| Extends the values of an unsigned int32x4 vector to 64-bits. More...
|
|
basic_int64x4 | to_int64x4 (uint32x8 a) |
| Extends the values of a signed int32x4 vector to 64-bits. More...
|
|
|
float32x4 | to_float32x4 (int32x4 a) |
| Converts 32-bit integer values to 32-bit float values. More...
|
|
float32x8 | to_float32x8 (int32x8 a) |
| Converts 32-bit integer values to 32-bit float values. More...
|
|
|
float32x4 | to_float32x4 (float64x2 a) |
| Converts 64-bit float values to 32-bit float values. More...
|
|
float32x8 | to_float32x8 (float64x4 a) |
| Converts 64-bit float values to 32-bit float values. More...
|
|
|
float64x2 | to_float64x2 (int32x4 a) |
| Converts the 32-bit integer values to 64-bit float values. More...
|
|
float64x4 | to_float64x4 (int32x8 a) |
| Converts the 32-bit integer values to 64-bit float values. More...
|
|
|
float64x2 | to_float64x2 (float32x4 a) |
| Converts the 32-bit float values to 64-bit float values. More...
|
|
float64x4 | to_float64x4 (float32x8 a) |
| Converts the 32-bit float values to 64-bit float values. More...
|
|
|
template<unsigned id> |
uint8_t | extract (basic_int8x16 a) |
| Extracts the id-th element from int8x16 vector. More...
|
|
template<unsigned id> |
int8_t | extract (int8x16 a) |
| Extracts the id-th element from int8x16 vector. More...
|
|
|
template<unsigned id> |
uint16_t | extract (basic_int16x8 a) |
| Extracts the id-th element from int16x8 vector. More...
|
|
template<unsigned id> |
int16_t | extract (int16x8 a) |
| Extracts the id-th element from int16x8 vector. More...
|
|
|
template<unsigned id> |
uint32_t | extract (basic_int32x4 a) |
| Extracts the id-th element from int32x4 vector. More...
|
|
template<unsigned id> |
int32_t | extract (int32x4 a) |
| Extracts the id-th element from int32x4 vector. More...
|
|
|
template<unsigned id> |
uint64_t | extract (basic_int64x2 a) |
| Extracts an element from int64x2 vector. More...
|
|
template<unsigned id> |
int64_t | extract (int64x2 a) |
| Extracts an element from int64x2 vector. More...
|
|
|
int256 | combine (int128 a, int128 b) |
| Combines two 128-bit vectors into a 256-bit vector. More...
|
|
float32x8 | combine (float32x4 a, float32x4 b) |
| Combines two 128-bit vectors into a 256-bit vector. More...
|
|
float64x4 | combine (float64x2 a, float64x2 b) |
| Combines two 128-bit vectors into a 256-bit vector. More...
|
|
|
template<int s0, int s1> |
basic_int8x16 | make_shuffle_bytes16_mask (basic_int8x16 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1> |
basic_int8x32 | make_shuffle_bytes16_mask (basic_int8x32 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1, int s2, int s3> |
basic_int8x16 | make_shuffle_bytes16_mask (basic_int8x16 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1, int s2, int s3> |
basic_int8x32 | make_shuffle_bytes16_mask (basic_int8x32 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7> |
basic_int8x16 | make_shuffle_bytes16_mask (basic_int8x16 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7> |
basic_int8x32 | make_shuffle_bytes16_mask (basic_int8x32 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7, int s8, int s9, int s10, int s11, int s12, int s13, int s14, int s15> |
basic_int8x16 | make_shuffle_bytes16_mask (basic_int8x16 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7, int s8, int s9, int s10, int s11, int s12, int s13, int s14, int s15> |
basic_int8x32 | make_shuffle_bytes16_mask (basic_int8x32 &mask) |
| Makes a mask to shuffle an int8x16 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1> |
basic_int16x8 | make_shuffle_bytes16_mask (basic_int16x8 &mask) |
| Makes a mask to shuffle an int16x8 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1> |
basic_int16x16 | make_shuffle_bytes16_mask (basic_int16x16 &mask) |
| Makes a mask to shuffle an int16x8 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1, int s2, int s3> |
basic_int16x8 | make_shuffle_bytes16_mask (basic_int16x8 &mask) |
| Makes a mask to shuffle an int16x8 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1, int s2, int s3> |
basic_int16x16 | make_shuffle_bytes16_mask (basic_int16x16 &mask) |
| Makes a mask to shuffle an int16x8 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7> |
basic_int16x8 | make_shuffle_bytes16_mask (basic_int16x8 &mask) |
| Makes a mask to shuffle an int16x8 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1, int s2, int s3, int s4, int s5, int s6, int s7> |
basic_int16x16 | make_shuffle_bytes16_mask (basic_int16x16 &mask) |
| Makes a mask to shuffle an int16x8 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1> |
basic_int32x4 | make_shuffle_bytes16_mask (basic_int32x4 &mask) |
| Makes a mask to shuffle an int32x4 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1> |
basic_int32x8 | make_shuffle_bytes16_mask (basic_int32x8 &mask) |
| Makes a mask to shuffle an int32x4 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1, int s2, int s3> |
basic_int32x4 | make_shuffle_bytes16_mask (basic_int32x4 &mask) |
| Makes a mask to shuffle an int32x4 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1, int s2, int s3> |
basic_int32x8 | make_shuffle_bytes16_mask (basic_int32x8 &mask) |
| Makes a mask to shuffle an int32x4 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
template<int s0, int s1> |
basic_int64x2 | make_shuffle_bytes16_mask (basic_int64x2 &mask) |
| Makes a mask to shuffle an int64x2 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
template<int s0, int s1> |
basic_int64x4 | make_shuffle_bytes16_mask (basic_int64x4 &mask) |
| Makes a mask to shuffle an int64x2 vector using permute_bytes16 , shuffle_bytes16 , permute_zbytes16 or shuffle_zbytes16 functions. More...
|
|
|
mask_float32x4 | isnan (float32x4 a) |
| Checks whether elements in a are IEEE754 NaN. More...
|
|
mask_float32x8 | isnan (float32x8 a) |
| Checks whether elements in a are IEEE754 NaN. More...
|
|
|
mask_float64x2 | isnan (float64x2 a) |
| Checks whether elements in a are IEEE754 NaN. More...
|
|
mask_float64x4 | isnan (float64x4 a) |
| Checks whether elements in a are IEEE754 NaN. More...
|
|
|
mask_float32x4 | isnan2 (float32x4 a, float32x4 b) |
| Checks whether corresponding elements in either a or b are IEEE754 NaN. More...
|
|
mask_float32x8 | isnan2 (float32x8 a, float32x8 b) |
| Checks whether corresponding elements in either a or b are IEEE754 NaN. More...
|
|
|
mask_float64x2 | isnan2 (float64x2 a, float64x2 b) |
| Checks whether corresponding elements in either a or b are IEEE754 NaN. More...
|
|
mask_float64x4 | isnan2 (float64x4 a, float64x4 b) |
| Checks whether corresponding elements in either a or b are IEEE754 NaN. More...
|
|
|
float32x4 | rcp_e (float32x4 a) |
| Computes approximate reciprocal. More...
|
|
float32x8 | rcp_e (float32x8 a) |
| Computes approximate reciprocal. More...
|
|
|
float32x4 | rcp_rh (float32x4 x, float32x4 a) |
| Computes one Newton-Rhapson iterations for reciprocal. More...
|
|
float32x8 | rcp_rh (float32x8 x, float32x8 a) |
| Computes one Newton-Rhapson iterations for reciprocal. More...
|
|
|
float32x4 | div (float32x4 a, float32x4 b) |
| Divides the values of two vectors. More...
|
|
float32x8 | div (float32x8 a, float32x8 b) |
| Divides the values of two vectors. More...
|
|
|
float64x2 | div (float64x2 a, float64x2 b) |
| Divides the values of two vectors. More...
|
|
float64x4 | div (float64x4 a, float64x4 b) |
| Divides the values of two vectors. More...
|
|
|
float32x4 | rsqrt_e (float32x4 a) |
| Computes approximate reciprocal square root. More...
|
|
float32x8 | rsqrt_e (float32x8 a) |
| Computes approximate reciprocal square root. More...
|
|
|
float32x4 | rsqrt_rh (float32x4 x, float32x4 a) |
| Computes one Newton-Rhapson iteration for inverse of square root. More...
|
|
float32x8 | rsqrt_rh (float32x8 x, float32x8 a) |
| Computes one Newton-Rhapson iteration for inverse of square root. More...
|
|
|
float32x4 | sqrt (float32x4 a) |
| Computes square root. More...
|
|
float32x8 | sqrt (float32x8 a) |
| Computes square root. More...
|
|
|
float64x2 | sqrt (float64x2 a) |
| Computes square root. More...
|
|
float64x4 | sqrt (float64x4 a) |
| Computes square root. More...
|
|
|
float32x4 | min (float32x4 a, float32x4 b) |
| Computes minimum of the values in two vectors. More...
|
|
float32x8 | min (float32x8 a, float32x8 b) |
| Computes minimum of the values in two vectors. More...
|
|
|
float64x2 | min (float64x2 a, float64x2 b) |
| Computes minima of the values in two vectors. More...
|
|
float64x4 | min (float64x4 a, float64x4 b) |
| Computes minima of the values in two vectors. More...
|
|
|
float32x4 | max (float32x4 a, float32x4 b) |
| Computes maxima of the values of two vectors. More...
|
|
float32x8 | max (float32x8 a, float32x8 b) |
| Computes maxima of the values of two vectors. More...
|
|
|
float64x2 | max (float64x2 a, float64x2 b) |
| Computes maxima of the values of two vectors. More...
|
|
float64x4 | max (float64x4 a, float64x4 b) |
| Computes maxima of the values of two vectors. More...
|
|
|
float32x4 | floor (float32x4 a) |
| Rounds the values of a vector towards negative infinity. More...
|
|
float32x8 | floor (float32x8 a) |
| Rounds the values of a vector towards negative infinity. More...
|
|
float32x4 | ceil (float32x4 a) |
| Rounds the values a vector towards positive infinity. More...
|
|
float32x8 | ceil (float32x8 a) |
| Rounds the values of a vector towards negative infinity. More...
|
|
|
float32x4 | trunc (float32x4 a) |
| Rounds the values of a vector towards zero. More...
|
|
float32x8 | trunc (float32x8 a) |
| Rounds the values of a vector towards zero. More...
|
|
|
float64x2 | abs (float64x2 a) |
| Computes absolute value of floating point values. More...
|
|
float64x4 | abs (float64x4 a) |
| Computes absolute value of floating point values. More...
|
|
|
float32x4 | sign (float32x4 a) |
| Extracts sign bits from the values in float32x4 vector. More...
|
|
float32x8 | sign (float32x8 a) |
| Extracts sign bits from the values in float32x4 vector. More...
|
|
|
float64x2 | sign (float64x2 a) |
| Extracts sigh bit from the values in float64x2 vector. More...
|
|
float64x4 | sign (float64x4 a) |
| Extracts sigh bit from the values in float64x2 vector. More...
|
|
|
float32x4 | add (float32x4 a, float32x4 b) |
| Adds the values of two vectors. More...
|
|
float32x8 | add (float32x8 a, float32x8 b) |
| Adds the values of two vectors. More...
|
|
|
float64x2 | add (float64x2 a, float64x2 b) |
| Adds the values of two vectors. More...
|
|
float64x4 | add (float64x4 a, float64x4 b) |
| Adds the values of two vectors. More...
|
|
|
float32x4 | sub (float32x4 a, float32x4 b) |
| Substracts the values of two vectors. More...
|
|
float32x8 | sub (float32x8 a, float32x8 b) |
| Substracts the values of two vectors. More...
|
|
|
float64x2 | sub (float64x2 a, float64x2 b) |
| Subtracts the values of two vectors. More...
|
|
float64x4 | sub (float64x4 a, float64x4 b) |
| Subtracts the values of two vectors. More...
|
|
|
float32x4 | neg (float32x4 a) |
| Negates the values of a float32x4 vector. More...
|
|
float32x8 | neg (float32x8 a) |
| Negates the values of a float32x4 vector. More...
|
|
|
float64x2 | neg (float64x2 a) |
| Negates the values of a vector. More...
|
|
float64x4 | neg (float64x4 a) |
| Negates the values of a vector. More...
|
|
|
float32x4 | mul (float32x4 a, float32x4 b) |
| Multiplies the values of two vectors. More...
|
|
float32x8 | mul (float32x8 a, float32x8 b) |
| Multiplies the values of two vectors. More...
|
|
|
float64x2 | mul (float64x2 a, float64x2 b) |
| Multiplies the values of two vectors. More...
|
|
float64x4 | mul (float64x4 a, float64x4 b) |
| Multiplies the values of two vectors. More...
|
|
|
float32x4 | fmadd (float32x4 a, float32x4 b, float32x4 c) |
| Performs a fused multiply-add operation. More...
|
|
float32x8 | fmadd (float32x8 a, float32x8 b, float32x8 c) |
| Performs a fused multiply-add operation. More...
|
|
float64x2 | fmadd (float64x2 a, float64x2 b, float64x2 c) |
| Performs a fused multiply-add operation. More...
|
|
float64x4 | fmadd (float64x4 a, float64x4 b, float64x4 c) |
| Performs a fused multiply-add operation. More...
|
|
|
float32x4 | fmsub (float32x4 a, float32x4 b, float32x4 c) |
| Performs a fused multiply-sutract operation. More...
|
|
float32x8 | fmsub (float32x8 a, float32x8 b, float32x8 c) |
| Performs a fused multiply-sutract operation. More...
|
|
float64x2 | fmsub (float64x2 a, float64x2 b, float64x2 c) |
| Performs a fused multiply-sutract operation. More...
|
|
float64x4 | fmsub (float64x4 a, float64x4 b, float64x4 c) |
| Performs a fused multiply-sutract operation. More...
|
|
|
int8x16 | min (int8x16 a, int8x16 b) |
| Computes minimum of signed 8-bit values. More...
|
|
int8x32 | min (int8x32 a, int8x32 b) |
| Computes minimum of signed 8-bit values. More...
|
|
|
uint8x16 | min (uint8x16 a, uint8x16 b) |
| Computes minimum of the unsigned 8-bit values. More...
|
|
uint8x32 | min (uint8x32 a, uint8x32 b) |
| Computes minimum of the unsigned 8-bit values. More...
|
|
|
int16x8 | min (int16x8 a, int16x8 b) |
| Computes minimum of the signed 16-bit values. More...
|
|
int16x16 | min (int16x16 a, int16x16 b) |
| Computes minimum of the signed 16-bit values. More...
|
|
|
uint16x8 | min (uint16x8 a, uint16x8 b) |
| Computes minimum of the unsigned 16-bit values. More...
|
|
uint16x16 | min (uint16x16 a, uint16x16 b) |
| Computes minimum of the unsigned 16-bit values. More...
|
|
|
int32x4 | min (int32x4 a, int32x4 b) |
| Computes minimum of the signed 32-bit values. More...
|
|
int32x8 | min (int32x8 a, int32x8 b) |
| Computes minimum of the signed 32-bit values. More...
|
|
|
uint32x4 | min (uint32x4 a, uint32x4 b) |
| Computes minimum of the unsigned 32-bit values. More...
|
|
uint32x8 | min (uint32x8 a, uint32x8 b) |
| Computes minimum of the unsigned 32-bit values. More...
|
|
|
int8x16 | max (int8x16 a, int8x16 b) |
| Computes maximum of the signed 8-bit values. More...
|
|
int8x32 | max (int8x32 a, int8x32 b) |
| Computes maximum of the signed 8-bit values. More...
|
|
|
uint8x16 | max (uint8x16 a, uint8x16 b) |
| Computes maximum of the unsigned 8-bit values. More...
|
|
uint8x32 | max (uint8x32 a, uint8x32 b) |
| Computes maximum of the unsigned 8-bit values. More...
|
|
|
int16x8 | max (int16x8 a, int16x8 b) |
| Computes maximum of the signed 16-bit values. More...
|
|
int16x16 | max (int16x16 a, int16x16 b) |
| Computes maximum of the signed 16-bit values. More...
|
|
|
uint16x8 | max (uint16x8 a, uint16x8 b) |
| Computes maximum of the unsigned 16-bit values. More...
|
|
uint16x16 | max (uint16x16 a, uint16x16 b) |
| Computes maximum of the unsigned 16-bit values. More...
|
|
|
int32x4 | max (int32x4 a, int32x4 b) |
| Computes maximum of the signed 32-bit values. More...
|
|
int32x8 | max (int32x8 a, int32x8 b) |
| Computes maximum of the signed 32-bit values. More...
|
|
|
uint32x4 | max (uint32x4 a, uint32x4 b) |
| Computes maximum of the unsigned 32-bit values. More...
|
|
uint32x8 | max (uint32x8 a, uint32x8 b) |
| Computes maximum of the unsigned 32-bit values. More...
|
|
|
uint8x16 | avg (uint8x16 a, uint8x16 b) |
| Computes rounded average of the unsigned 8-bit values. More...
|
|
uint8x32 | avg (uint8x32 a, uint8x32 b) |
| Computes rounded average of the unsigned 8-bit values. More...
|
|
|
int8x16 | avg (int8x16 a, int8x16 b) |
| Computes rounded average of signed 8-bit values. More...
|
|
int8x32 | avg (int8x32 a, int8x32 b) |
| Computes rounded average of signed 8-bit values. More...
|
|
|
uint16x8 | avg (uint16x8 a, uint16x8 b) |
| Computes rounded average of unsigned 16-bit values. More...
|
|
uint16x16 | avg (uint16x16 a, uint16x16 b) |
| Computes rounded average of unsigned 16-bit values. More...
|
|
|
int16x8 | avg (int16x8 a, int16x8 b) |
| Computes rounded average of signed 16-bit values. More...
|
|
int16x16 | avg (int16x16 a, int16x16 b) |
| Computes rounded average of signed 16-bit values. More...
|
|
|
uint32x4 | avg (uint32x4 a, uint32x4 b) |
| Computes rounded average of unsigned 32-bit values. More...
|
|
uint32x8 | avg (uint32x8 a, uint32x8 b) |
| Computes rounded average of unsigned 32-bit values. More...
|
|
|
int32x4 | avg (int32x4 a, int32x4 b) |
| Computes rounded average of signed 32-bit values. More...
|
|
int32x8 | avg (int32x8 a, int32x8 b) |
| Computes rounded average of signed 32-bit values. More...
|
|
|
uint8x16 | avg_trunc (uint8x16 a, uint8x16 b) |
| Computes truncated average of the unsigned 8-bit values. More...
|
|
uint8x32 | avg_trunc (uint8x32 a, uint8x32 b) |
| Computes truncated average of the unsigned 8-bit values. More...
|
|
|
int8x16 | avg_trunc (int8x16 a, int8x16 b) |
| Computes truncated average of signed 8-bit values. More...
|
|
int8x32 | avg_trunc (int8x32 a, int8x32 b) |
| Computes truncated average of signed 8-bit values. More...
|
|
|
uint16x8 | avg_trunc (uint16x8 a, uint16x8 b) |
| Computes truncated average of unsigned 16-bit values. More...
|
|
uint16x16 | avg_trunc (uint16x16 a, uint16x16 b) |
| Computes truncated average of unsigned 16-bit values. More...
|
|
|
int16x8 | avg_trunc (int16x8 a, int16x8 b) |
| Computes truncated average of signed 16-bit values. More...
|
|
int16x16 | avg_trunc (int16x16 a, int16x16 b) |
| Computes truncated average of signed 16-bit values. More...
|
|
|
uint32x4 | avg_trunc (uint32x4 a, uint32x4 b) |
| Computes truncated average of unsigned 32-bit values. More...
|
|
uint32x8 | avg_trunc (uint32x8 a, uint32x8 b) |
| Computes truncated average of unsigned 32-bit values. More...
|
|
|
int32x4 | avg_trunc (int32x4 a, int32x4 b) |
| Computes truncated average of signed 32-bit values. More...
|
|
int32x8 | avg_trunc (int32x8 a, int32x8 b) |
| Computes truncated average of signed 32-bit values. More...
|
|
|
uint8x16 | abs (int8x16 a) |
| Computes absolute value of 8-bit integer values. More...
|
|
uint8x32 | abs (int8x32 a) |
| Computes absolute value of 8-bit integer values. More...
|
|
|
uint16x8 | abs (int16x8 a) |
| Computes absolute value of 16-bit integer values. More...
|
|
uint16x16 | abs (int16x16 a) |
| Computes absolute value of 16-bit integer values. More...
|
|
|
uint32x4 | abs (int32x4 a) |
| Computes absolute value of 32-bit integer values. More...
|
|
uint32x8 | abs (int32x8 a) |
| Computes absolute value of 32-bit integer values. More...
|
|
|
uint64x2 | abs (int64x2 a) |
| Computes absolute value of 64-bit integer values. More...
|
|
uint64x4 | abs (int64x4 a) |
| Computes absolute value of 64-bit integer values. More...
|
|
|
template<unsigned P> |
uint8x16 | div_p (uint8x16 num, uint8x16 den) |
| Divides one 8-bit unsigned number by another. More...
|
|
template<unsigned P> |
uint16x8 | div_p (uint16x8 num, uint16x8 den) |
| Divides one 8-bit unsigned number by another. More...
|
|
|
basic_int16x8 | add (basic_int16x8 a, basic_int16x8 b) |
| Adds 16-bit integer values. More...
|
|
basic_int16x16 | add (basic_int16x16 a, basic_int16x16 b) |
| Adds 16-bit integer values. More...
|
|
|
basic_int32x4 | add (basic_int32x4 a, basic_int32x4 b) |
| Adds 32-bit integer values. More...
|
|
basic_int32x8 | add (basic_int32x8 a, basic_int32x8 b) |
| Adds 32-bit integer values. More...
|
|
|
basic_int64x2 | add (basic_int64x2 a, basic_int64x2 b) |
| Adds 64-bit integer values. More...
|
|
basic_int64x4 | add (basic_int64x4 a, basic_int64x4 b) |
| Adds 64-bit integer values. More...
|
|
|
int8x16 | adds (int8x16 a, int8x16 b) |
| Adds and saturates signed 8-bit integer values. More...
|
|
int8x32 | adds (int8x32 a, int8x32 b) |
| Adds and saturates signed 8-bit integer values. More...
|
|
|
int16x8 | adds (int16x8 a, int16x8 b) |
| Adds and saturates signed 16-bit integer values. More...
|
|
int16x16 | adds (int16x16 a, int16x16 b) |
| Adds and saturates signed 16-bit integer values. More...
|
|
|
uint8x16 | adds (uint8x16 a, uint8x16 b) |
| Adds and saturates unsigned 8-bit integer values. More...
|
|
uint8x32 | adds (uint8x32 a, uint8x32 b) |
| Adds and saturates unsigned 8-bit integer values. More...
|
|
|
uint16x8 | adds (uint16x8 a, uint16x8 b) |
| Adds and saturates unsigned 16-bit integer values. More...
|
|
uint16x16 | adds (uint16x16 a, uint16x16 b) |
| Adds and saturates unsigned 16-bit integer values. More...
|
|
|
basic_int8x16 | sub (basic_int8x16 a, basic_int8x16 b) |
| Subtracts 8-bit integer values. More...
|
|
basic_int8x32 | sub (basic_int8x32 a, basic_int8x32 b) |
| Subtracts 8-bit integer values. More...
|
|
|
basic_int16x8 | sub (basic_int16x8 a, basic_int16x8 b) |
| Subtracts 16-bit integer values. More...
|
|
basic_int16x16 | sub (basic_int16x16 a, basic_int16x16 b) |
| Subtracts 16-bit integer values. More...
|
|
|
basic_int32x4 | sub (basic_int32x4 a, basic_int32x4 b) |
| Subtracts 32-bit integer values. More...
|
|
basic_int32x8 | sub (basic_int32x8 a, basic_int32x8 b) |
| Subtracts 32-bit integer values. More...
|
|
|
basic_int64x2 | sub (basic_int64x2 a, basic_int64x2 b) |
| Subtracts 64-bit integer values. More...
|
|
basic_int64x4 | sub (basic_int64x4 a, basic_int64x4 b) |
| Subtracts 64-bit integer values. More...
|
|
|
int8x16 | subs (int8x16 a, int8x16 b) |
| Subtracts and saturaters signed 8-bit integer values. More...
|
|
int8x32 | subs (int8x32 a, int8x32 b) |
| Subtracts and saturaters signed 8-bit integer values. More...
|
|
|
int16x8 | subs (int16x8 a, int16x8 b) |
| Subtracts and saturaters signed 16-bit integer values. More...
|
|
int16x16 | subs (int16x16 a, int16x16 b) |
| Subtracts and saturaters signed 16-bit integer values. More...
|
|
|
uint8x16 | subs (uint8x16 a, uint8x16 b) |
| Subtracts and saturaters unsigned 8-bit integer values. More...
|
|
uint8x32 | subs (uint8x32 a, uint8x32 b) |
| Subtracts and saturaters unsigned 8-bit integer values. More...
|
|
|
uint16x8 | subs (uint16x8 a, uint16x8 b) |
| Subtracts and saturaters unsigned 16-bit integer values. More...
|
|
uint16x16 | subs (uint16x16 a, uint16x16 b) |
| Subtracts and saturaters unsigned 16-bit integer values. More...
|
|
|
int8x16 | neg (int8x16 a) |
| Negates signed 8-bit values. More...
|
|
int8x32 | neg (int8x32 a) |
| Negates signed 8-bit values. More...
|
|
|
int16x8 | neg (int16x8 a) |
| Negates signed 16-bit values. More...
|
|
int16x16 | neg (int16x16 a) |
| Negates signed 16-bit values. More...
|
|
|
int32x4 | neg (int32x4 a) |
| Negates signed 32-bit values. More...
|
|
int32x8 | neg (int32x8 a) |
| Negates signed 32-bit values. More...
|
|
|
int64x2 | neg (int64x2 a) |
| Negates signed 64-bit values. More...
|
|
int64x4 | neg (int64x4 a) |
| Negates signed 64-bit values. More...
|
|
|
basic_int16x8 | mul_lo (basic_int16x8 a, basic_int16x8 b) |
| Multiplies 16-bit values and returns the lower part of the multiplication. More...
|
|
basic_int16x16 | mul_lo (basic_int16x16 a, basic_int16x16 b) |
| Multiplies 16-bit values and returns the lower part of the multiplication. More...
|
|
|
int16x8 | mul_hi (int16x8 a, int16x8 b) |
| Multiplies signed 16-bit values and returns the higher half of the result. More...
|
|
int16x16 | mul_hi (int16x16 a, int16x16 b) |
| Multiplies signed 16-bit values and returns the higher half of the result. More...
|
|
|
uint16x8 | mul_hi (uint16x8 a, uint16x8 b) |
| Multiplies unsigned 16-bit values and returns the higher half of the result. More...
|
|
uint16x16 | mul_hi (uint16x16 a, uint16x16 b) |
| Multiplies unsigned 16-bit values and returns the higher half of the result. More...
|
|
|
int128 | mul_lo (basic_int32x4 a, basic_int32x4 b) |
| Multiplies 32-bit values and returns the lower half of the result. More...
|
|
basic_int32x8 | mul_lo (basic_int32x8 a, basic_int32x8 b) |
| Multiplies 32-bit values and returns the lower half of the result. More...
|
|
|
int32x4 | mull_lo (int16x8 a, int16x8 b) |
| Multiplies signed 16-bit values in the lower halves of the vectors and expands the results to 32 bits. More...
|
|
int32x8 | mull_lo (int16x16 a, int16x16 b) |
| Multiplies signed 16-bit values in the lower halves of the vectors and expands the results to 32 bits. More...
|
|
|
uint32x4 | mull_lo (uint16x8 a, uint16x8 b) |
| Multiplies unsigned 16-bit values in the lower halves of the vectors and expands the results to 32 bits. More...
|
|
uint32x8 | mull_lo (uint16x16 a, uint16x16 b) |
| Multiplies unsigned 16-bit values in the lower halves of the vectors and expands the results to 32 bits. More...
|
|
|
int32x4 | mull_hi (int16x8 a, int16x8 b) |
| Multiplies signed 16-bit values in the higher halves of the vectors and expands the results to 32 bits. More...
|
|
int32x8 | mull_hi (int16x16 a, int16x16 b) |
| Multiplies signed 16-bit values in the higher halves of the vectors and expands the results to 32 bits. More...
|
|
|
uint32x4 | mull_hi (uint16x8 a, uint16x8 b) |
| Multiplies unsigned 16-bit values in the higher halves of the vectors and expands the results to 32 bits. More...
|
|
uint32x8 | mull_hi (uint16x16 a, uint16x16 b) |
| Multiplies unsigned 16-bit values in the higher halves of the vectors and expands the results to 32 bits. More...
|
|
|
int64x2 | mull_lo (int32x4 a, int32x4 b) |
| Multiplies signed 32-bit values in the lower halves of the vectors and expands the results to 64 bits. More...
|
|
int64x4 | mull_lo (int32x8 a, int32x8 b) |
| Multiplies signed 32-bit values in the lower halves of the vectors and expands the results to 64 bits. More...
|
|
|
uint64x2 | mull_lo (uint32x4 a, uint32x4 b) |
| Multiplies unsigned 32-bit values in the lower halves of the vectors and expands the results to 64 bits. More...
|
|
uint64x4 | mull_lo (uint32x8 a, uint32x8 b) |
| Multiplies unsigned 32-bit values in the lower halves of the vectors and expands the results to 64 bits. More...
|
|
|
int64x2 | mull_hi (int32x4 a, int32x4 b) |
| Multiplies signed 32-bit values in the higher halves of the vectors and expands the results to 64 bits. More...
|
|
int64x4 | mull_hi (int32x8 a, int32x8 b) |
| Multiplies signed 32-bit values in the higher halves of the vectors and expands the results to 64 bits. More...
|
|
|
uint64x2 | mull_hi (uint32x4 a, uint32x4 b) |
| Multiplies unsigned 32-bit values in the higher halves of the vectors and expands the results to 64 bits. More...
|
|
uint64x4 | mull_hi (uint32x8 a, uint32x8 b) |
| Multiplies unsigned 32-bit values in the higher halves of the vectors and expands the results to 64 bits. More...
|
|
|
uint8x16 | shift_r (uint8x16 a, unsigned count) |
| Shifts unsigned 8-bit values right by count bits while shifting in zeros. More...
|
|
uint8x32 | shift_r (uint8x32 a, unsigned count) |
| Shifts unsigned 8-bit values right by count bits while shifting in zeros. More...
|
|
|
int16x8 | shift_r (int16x8 a, unsigned count) |
| Shifts signed 16-bit values right by count bits while shifting in the sign bit. More...
|
|
int16x16 | shift_r (int16x16 a, unsigned count) |
| Shifts signed 16-bit values right by count bits while shifting in the sign bit. More...
|
|
|
uint16x8 | shift_r (uint16x8 a, unsigned count) |
| Shifts unsigned 16-bit values right by count bits while shifting in zeros. More...
|
|
uint16x16 | shift_r (uint16x16 a, unsigned count) |
| Shifts unsigned 16-bit values right by count bits while shifting in zeros. More...
|
|
|
int32x4 | shift_r (int32x4 a, unsigned count) |
| Shifts signed 32-bit values right by count bits while shifting in the sign bit. More...
|
|
int32x8 | shift_r (int32x8 a, unsigned count) |
| Shifts signed 32-bit values right by count bits while shifting in the sign bit. More...
|
|
|
uint32x4 | shift_r (uint32x4 a, unsigned count) |
| Shifts unsigned 32-bit values right by count bits while shifting in zeros. More...
|
|
uint32x8 | shift_r (uint32x8 a, unsigned count) |
| Shifts unsigned 32-bit values right by count bits while shifting in zeros. More...
|
|
|
int64x2 | shift_r (int64x2 a, unsigned count) |
| Shifts signed 64-bit values right by count bits while shifting in the sign bit. More...
|
|
int64x4 | shift_r (int64x4 a, unsigned count) |
| Shifts signed 64-bit values right by count bits while shifting in the sign bit. More...
|
|
|
uint64x2 | shift_r (uint64x2 a, unsigned count) |
| Shifts unsigned 64-bit values right by count bits while shifting in zeros. More...
|
|
uint64x4 | shift_r (uint64x4 a, unsigned count) |
| Shifts unsigned 64-bit values right by count bits while shifting in zeros. More...
|
|
|
basic_int8x16 | shift_l (basic_int8x16 a, unsigned count) |
| Shifts 8-bit values left by count bits while shifting in zeros. More...
|
|
basic_int8x32 | shift_l (basic_int8x32 a, unsigned count) |
| Shifts 8-bit values left by count bits while shifting in zeros. More...
|
|
|
basic_int16x8 | shift_l (basic_int16x8 a, unsigned count) |
| Shifts 16-bit values left by count bits while shifting in zeros. More...
|
|
basic_int16x16 | shift_l (basic_int16x16 a, unsigned count) |
| Shifts 16-bit values left by count bits while shifting in zeros. More...
|
|
|
basic_int32x4 | shift_l (basic_int32x4 a, unsigned count) |
| Shifts 32-bit values left by count bits while shifting in zeros. More...
|
|
basic_int32x8 | shift_l (basic_int32x8 a, unsigned count) |
| Shifts 32-bit values left by count bits while shifting in zeros. More...
|
|
|
basic_int64x2 | shift_l (basic_int64x2 a, unsigned count) |
| Shifts 64-bit values left by count bits while shifting in zeros. More...
|
|
basic_int64x4 | shift_l (basic_int64x4 a, unsigned count) |
| Shifts 64-bit values left by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
int8x16 | shift_r (int8x16 a) |
| Shifts signed 8-bit values right by count bits while shifting in the sign bit. More...
|
|
template<unsigned count> |
int8x32 | shift_r (int8x32 a) |
| Shifts signed 8-bit values right by count bits while shifting in the sign bit. More...
|
|
|
template<unsigned count> |
uint8x16 | shift_r (uint8x16 a) |
| Shifts unsigned 8-bit values right by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
uint8x32 | shift_r (uint8x32 a) |
| Shifts unsigned 8-bit values right by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
int16x8 | shift_r (int16x8 a) |
| Shifts signed 16-bit values right by count bits while shifting in the sign bit. More...
|
|
template<unsigned count> |
int16x16 | shift_r (int16x16 a) |
| Shifts signed 16-bit values right by count bits while shifting in the sign bit. More...
|
|
|
template<unsigned count> |
uint16x8 | shift_r (uint16x8 a) |
| Shifts unsigned 16-bit values right by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
uint16x16 | shift_r (uint16x16 a) |
| Shifts unsigned 16-bit values right by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
int32x4 | shift_r (int32x4 a) |
| Shifts signed 32-bit values right by count bits while shifting in the sign bit. More...
|
|
template<unsigned count> |
int32x8 | shift_r (int32x8 a) |
| Shifts signed 32-bit values right by count bits while shifting in the sign bit. More...
|
|
|
template<unsigned count> |
uint32x4 | shift_r (uint32x4 a) |
| Shifts unsigned 32-bit values right by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
uint32x8 | shift_r (uint32x8 a) |
| Shifts unsigned 32-bit values right by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
int64x2 | shift_r (int64x2 a) |
| Shifts signed 64-bit values right by count bits while shifting in the sign bit. More...
|
|
template<unsigned count> |
int64x4 | shift_r (int64x4 a) |
| Shifts signed 64-bit values right by count bits while shifting in the sign bit. More...
|
|
|
template<unsigned count> |
uint64x2 | shift_r (uint64x2 a) |
| Shifts unsigned 64-bit values right by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
uint64x4 | shift_r (uint64x4 a) |
| Shifts unsigned 64-bit values right by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
basic_int8x16 | shift_l (basic_int8x16 a) |
| Shifts 8-bit values left by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
basic_int8x32 | shift_l (basic_int8x32 a) |
| Shifts 8-bit values left by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
basic_int16x8 | shift_l (basic_int16x8 a) |
| Shifts 16-bit values left by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
basic_int16x16 | shift_l (basic_int16x16 a) |
| Shifts 16-bit values left by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
basic_int32x4 | shift_l (basic_int32x4 a) |
| Shifts 32-bit values left by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
basic_int32x8 | shift_l (basic_int32x8 a) |
| Shifts 32-bit values left by count bits while shifting in zeros. More...
|
|
|
template<unsigned count> |
basic_int64x2 | shift_l (basic_int64x2 a) |
| Shifts 64-bit values left by count bits while shifting in zeros. More...
|
|
template<unsigned count> |
basic_int64x4 | shift_l (basic_int64x4 a) |
| Shifts 64-bit values left by count bits while shifting in zeros. More...
|
|
|
basic_int8x16 | load_u (basic_int8x16 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int16x8 | load_u (basic_int16x8 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int32x4 | load_u (basic_int32x4 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int64x2 | load_u (basic_int64x2 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
float32x4 | load_u (float32x4 &a, const float *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
float64x2 | load_u (float64x2 &a, const double *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int8x32 | load_u (basic_int8x32 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int16x16 | load_u (basic_int16x16 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int32x8 | load_u (basic_int32x8 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
basic_int64x4 | load_u (basic_int64x4 &a, const void *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
float32x8 | load_u (float32x8 &a, const float *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
float64x4 | load_u (float64x4 &a, const double *p) |
| Loads a 128-bit or 256-bit integer, 32-bit or 64-bit float vector from an unaligned memory location. More...
|
|
|
void | load_packed2 (basic_int8x16 &a, basic_int8x16 &b, const void *p) |
| Loads 8-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
void | load_packed2 (basic_int8x32 &a, basic_int8x32 &b, const void *p) |
| Loads 8-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
|
void | load_packed2 (basic_int16x8 &a, basic_int16x8 &b, const void *p) |
| Loads 16-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
void | load_packed2 (basic_int16x16 &a, basic_int16x16 &b, const void *p) |
| Loads 16-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
|
void | load_packed2 (basic_int32x4 &a, basic_int32x4 &b, const void *p) |
| Loads 32-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
void | load_packed2 (basic_int32x8 &a, basic_int32x8 &b, const void *p) |
| Loads 32-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
|
void | load_packed2 (basic_int64x2 &a, basic_int64x2 &b, const void *p) |
| Loads 64-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
void | load_packed2 (basic_int64x4 &a, basic_int64x4 &b, const void *p) |
| Loads 64-bit values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
|
void | load_packed2 (float64x2 &a, float64x2 &b, const double *p) |
| Loads 64-bit float values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
void | load_packed2 (float64x4 &a, float64x4 &b, const double *p) |
| Loads 64-bit float values packed in pairs, de-interleaves them and stores the result into two vectors. More...
|
|
|
void | load_packed3 (basic_int8x16 &a, basic_int8x16 &b, basic_int8x16 &c, const void *p) |
| Loads 8-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
void | load_packed3 (basic_int8x32 &a, basic_int8x32 &b, basic_int8x32 &c, const void *p) |
| Loads 8-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
|
void | load_packed3 (basic_int16x8 &a, basic_int16x8 &b, basic_int16x8 &c, const void *p) |
| Loads 16-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
void | load_packed3 (basic_int16x16 &a, basic_int16x16 &b, basic_int16x16 &c, const void *p) |
| Loads 16-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
|
void | load_packed3 (basic_int32x4 &a, basic_int32x4 &b, basic_int32x4 &c, const void *p) |
| Loads 32-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
void | load_packed3 (basic_int32x8 &a, basic_int32x8 &b, basic_int32x8 &c, const void *p) |
| Loads 32-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
|
void | load_packed3 (basic_int64x2 &a, basic_int64x2 &b, basic_int64x2 &c, const void *p) |
| Loads 64-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
void | load_packed3 (basic_int64x4 &a, basic_int64x4 &b, basic_int64x4 &c, const void *p) |
| Loads 64-bit values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
|
void | load_packed3 (float32x4 &a, float32x4 &b, float32x4 &c, const float *p) |
| Loads 32-bit floating point values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
void | load_packed3 (float32x8 &a, float32x8 &b, float32x8 &c, const float *p) |
| Loads 32-bit floating point values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
|
void | load_packed3 (float64x2 &a, float64x2 &b, float64x2 &c, const double *p) |
| Loads 64-bit floating point values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
void | load_packed3 (float64x4 &a, float64x4 &b, float64x4 &c, const double *p) |
| Loads 64-bit floating point values packed in triplets, de-interleaves them and stores the result into three vectors. More...
|
|
|
void | load_packed4 (basic_int8x16 &a, basic_int8x16 &b, basic_int8x16 &c, basic_int8x16 &d, const void *p) |
| Loads 8-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
void | load_packed4 (basic_int8x32 &a, basic_int8x32 &b, basic_int8x32 &c, basic_int8x32 &d, const void *p) |
| Loads 8-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
|
void | load_packed4 (basic_int16x8 &a, basic_int16x8 &b, basic_int16x8 &c, basic_int16x8 &d, const void *p) |
| Loads 16-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
void | load_packed4 (basic_int16x16 &a, basic_int16x16 &b, basic_int16x16 &c, basic_int16x16 &d, const void *p) |
| Loads 16-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
|
void | load_packed4 (basic_int32x4 &a, basic_int32x4 &b, basic_int32x4 &c, basic_int32x4 &d, const void *p) |
| Loads 32-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
void | load_packed4 (basic_int32x8 &a, basic_int32x8 &b, basic_int32x8 &c, basic_int32x8 &d, const void *p) |
| Loads 32-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
|
void | load_packed4 (basic_int64x2 &a, basic_int64x2 &b, basic_int64x2 &c, basic_int64x2 &d, const void *p) |
| Loads 64-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
void | load_packed4 (basic_int64x4 &a, basic_int64x4 &b, basic_int64x4 &c, basic_int64x4 &d, const void *p) |
| Loads 64-bit values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
|
void | load_packed4 (float32x4 &a, float32x4 &b, float32x4 &c, float32x4 &d, const float *p) |
| Loads 32-bit floating-point values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
void | load_packed4 (float32x8 &a, float32x8 &b, float32x8 &c, float32x8 &d, const float *p) |
| Loads 32-bit floating-point values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
|
void | load_packed4 (float64x2 &a, float64x2 &b, float64x2 &c, float64x2 &d, const double *p) |
| Loads 64-bit floating-point values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
void | load_packed4 (float64x4 &a, float64x4 &b, float64x4 &c, float64x4 &d, const double *p) |
| Loads 64-bit floating-point values packed in quartets, de-interleaves them and stores the result into four vectors. More...
|
|
|
void | stream (void *p, int128 a) |
| Stores a 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory without polluting the caches, if possible. More...
|
|
void | stream (void *p, int256 a) |
| Stores a 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory without polluting the caches, if possible. More...
|
|
void | stream (float *p, float32x4 a) |
| Stores a 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory without polluting the caches, if possible. More...
|
|
void | stream (float *p, float32x8 a) |
| Stores a 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory without polluting the caches, if possible. More...
|
|
void | stream (double *p, float64x2 a) |
| Stores a 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory without polluting the caches, if possible. More...
|
|
void | stream (double *p, float64x4 a) |
| Stores a 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory without polluting the caches, if possible. More...
|
|
|
void | store_first (void *p, basic_int8x16 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int8x32 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int16x8 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int16x16 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int32x4 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int32x8 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int64x2 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (void *p, basic_int64x4 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (float *p, float32x4 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (float *p, float32x8 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (double *p, float64x2 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_first (double *p, float64x4 a, unsigned n) |
| Stores the first n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
|
void | store_last (void *p, basic_int8x16 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int8x32 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int16x8 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int16x16 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int32x4 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int32x8 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int64x2 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (void *p, basic_int64x4 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (float *p, float32x4 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (float *p, float32x8 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (double *p, float64x2 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
void | store_last (double *p, float64x4 a, unsigned n) |
| Stores the last n elements of an 128-bit or 256-bit integer, 32-bit or 64-bit floating point vector to memory. More...
|
|
|
void | store_packed2 (void *p, basic_int8x16 a, basic_int8x16 b) |
| Interleaves 8-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed2 (void *p, basic_int8x32 a, basic_int8x32 b) |
| Interleaves 8-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed2 (void *p, basic_int16x8 a, basic_int16x8 b) |
| Interleaves 16-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed2 (void *p, basic_int16x16 a, basic_int16x16 b) |
| Interleaves 16-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed2 (void *p, basic_int32x4 a, basic_int32x4 b) |
| Interleaves 32-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed2 (void *p, basic_int32x8 a, basic_int32x8 b) |
| Interleaves 32-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed2 (void *p, basic_int64x2 a, basic_int64x2 b) |
| Interleaves 64-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed2 (void *p, basic_int64x4 a, basic_int64x4 b) |
| Interleaves 64-bit values from two vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed2 (float *p, float32x4 a, float32x4 b) |
| Interleaves 32-bit floating-point values from two vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed2 (float *p, float32x8 a, float32x8 b) |
| Interleaves 32-bit floating-point values from two vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed2 (double *p, float64x2 a, float64x2 b) |
| Interleaves 64-bit floating-point values from two vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed2 (double *p, float64x4 a, float64x4 b) |
| Interleaves 64-bit floating-point values from two vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed3 (void *p, basic_int8x16 a, basic_int8x16 b, basic_int8x16 c) |
| Interleaves 8-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed3 (void *p, basic_int8x32 a, basic_int8x32 b, basic_int8x32 c) |
| Interleaves 8-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed3 (void *p, basic_int16x8 a, basic_int16x8 b, basic_int16x8 c) |
| Interleaves 16-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed3 (void *p, basic_int16x16 a, basic_int16x16 b, basic_int16x16 c) |
| Interleaves 16-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed3 (void *p, basic_int32x4 a, basic_int32x4 b, basic_int32x4 c) |
| Interleaves 32-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed3 (void *p, basic_int32x8 a, basic_int32x8 b, basic_int32x8 c) |
| Interleaves 32-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed3 (void *p, basic_int64x2 a, basic_int64x2 b, basic_int64x2 c) |
| Interleaves 64-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed3 (void *p, basic_int64x4 a, basic_int64x4 b, basic_int64x4 c) |
| Interleaves 64-bit values from three vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed3 (float *p, float32x4 a, float32x4 b, float32x4 c) |
| Interleaves 32-bit floating-point values from three vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed3 (float *p, float32x8 a, float32x8 b, float32x8 c) |
| Interleaves 32-bit floating-point values from three vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed3 (double *p, float64x2 a, float64x2 b, float64x2 c) |
| Interleaves 64-bit floating-point values from three vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed3 (double *p, float64x4 a, float64x4 b, float64x4 c) |
| Interleaves 64-bit floating-point values from three vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed4 (void *p, basic_int8x16 a, basic_int8x16 b, basic_int8x16 c, basic_int8x16 d) |
| Interleaves 8-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed4 (void *p, basic_int8x32 a, basic_int8x32 b, basic_int8x32 c, basic_int8x32 d) |
| Interleaves 8-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed4 (void *p, basic_int16x8 a, basic_int16x8 b, basic_int16x8 c, basic_int16x8 d) |
| Interleaves 16-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed4 (void *p, basic_int16x16 a, basic_int16x16 b, basic_int16x16 c, basic_int16x16 d) |
| Interleaves 16-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed4 (void *p, basic_int32x4 a, basic_int32x4 b, basic_int32x4 c, basic_int32x4 d) |
| Interleaves 32-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed4 (void *p, basic_int32x8 a, basic_int32x8 b, basic_int32x8 c, basic_int32x8 d) |
| Interleaves 32-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed4 (void *p, basic_int64x2 a, basic_int64x2 b, basic_int64x2 c, basic_int64x2 d) |
| Interleaves 64-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed4 (void *p, basic_int64x4 a, basic_int64x4 b, basic_int64x4 c, basic_int64x4 d) |
| Interleaves 64-bit values from four vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed4 (float *p, float32x4 a, float32x4 b, float32x4 c, float32x4 d) |
| Interleaves 32-bit floating-point values from four vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed4 (float *p, float32x8 a, float32x8 b, float32x8 c, float32x8 d) |
| Interleaves 32-bit floating-point values from four vectors and stores the result into successive locations starting from p. More...
|
|
|
void | store_packed4 (double *p, float64x2 a, float64x2 b, float64x2 c, float64x2 d) |
| Interleaves 64-bit floating-point values from four vectors and stores the result into successive locations starting from p. More...
|
|
void | store_packed4 (double *p, float64x4 a, float64x4 b, float64x4 c, float64x4 d) |
| Interleaves 64-bit floating-point values from four vectors and stores the result into successive locations starting from p. More...
|
|
|
float32x4 | zip_lo (float32x4 a, float32x4 b) |
| Interleaves the lower halves of two vectors. More...
|
|
float32x8 | zip_lo (float32x8 a, float32x8 b) |
| Interleaves the lower halves of two vectors. More...
|
|
float64x2 | zip_lo (float64x2 a, float64x2 b) |
| Interleaves the lower halves of two vectors. More...
|
|
float64x4 | zip_lo (float64x4 a, float64x4 b) |
| Interleaves the lower halves of two vectors. More...
|
|
|
basic_int8x16 | zip_hi (basic_int8x16 a, basic_int8x16 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int8x32 | zip_hi (basic_int8x32 a, basic_int8x32 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int16x8 | zip_hi (basic_int16x8 a, basic_int16x8 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int16x16 | zip_hi (basic_int16x16 a, basic_int16x16 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int32x4 | zip_hi (basic_int32x4 a, basic_int32x4 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int32x8 | zip_hi (basic_int32x8 a, basic_int32x8 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int64x2 | zip_hi (basic_int64x2 a, basic_int64x2 b) |
| Interleaves the higher halves of two vectors. More...
|
|
basic_int64x4 | zip_hi (basic_int64x4 a, basic_int64x4 b) |
| Interleaves the higher halves of two vectors. More...
|
|
|
float32x4 | zip_hi (float32x4 a, float32x4 b) |
| Interleaves the higher halves of two vectors. More...
|
|
float32x8 | zip_hi (float32x8 a, float32x8 b) |
| Interleaves the higher halves of two vectors. More...
|
|
float64x2 | zip_hi (float64x2 a, float64x2 b) |
| Interleaves the higher halves of two vectors. More...
|
|
float64x4 | zip_hi (float64x4 a, float64x4 b) |
| Interleaves the higher halves of two vectors. More...
|
|
|
template<unsigned shift> |
basic_int8x16 | move_l (basic_int8x16 a) |
| Moves the elements in an int8x16 vector to the left by shift positions. More...
|
|
template<unsigned shift> |
basic_int8x32 | move_l (basic_int8x32 a) |
| Moves the elements in an int8x16 vector to the left by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int16x8 | move_l (basic_int16x8 a) |
| Moves the 16-bit elements in a vector to the left by shift positions. More...
|
|
template<unsigned shift> |
basic_int16x16 | move_l (basic_int16x16 a) |
| Moves the 16-bit elements in a vector to the left by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int32x4 | move_l (basic_int32x4 a) |
| Moves the 32-bit elements in a vector to the left by shift positions. More...
|
|
template<unsigned shift> |
basic_int32x8 | move_l (basic_int32x8 a) |
| Moves the 32-bit elements in a vector to the left by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int64x2 | move_l (basic_int64x2 a) |
| Moves the 64-bit elements in a vector to the left by shift positions. More...
|
|
template<unsigned shift> |
basic_int64x4 | move_l (basic_int64x4 a) |
| Moves the 64-bit elements in a vector to the left by shift positions. More...
|
|
|
template<unsigned shift> |
float32x4 | move_l (float32x4 a) |
| Moves the 32-bit elements in a vector to the left by shift positions. More...
|
|
template<unsigned shift> |
float32x8 | move_l (float32x8 a) |
| Moves the 32-bit elements in a vector to the left by shift positions. More...
|
|
|
template<unsigned shift> |
float64x2 | move_l (float64x2 a) |
| Moves the 64-bit elements in a vector to the left by shift positions. More...
|
|
template<unsigned shift> |
float64x4 | move_l (float64x4 a) |
| Moves the 64-bit elements in a vector to the left by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int8x16 | move_r (basic_int8x16 a) |
| Moves the 8-bit elements in a vector to the right by shift positions. More...
|
|
template<unsigned shift> |
basic_int8x32 | move_r (basic_int8x32 a) |
| Moves the 8-bit elements in a vector to the right by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int16x8 | move_r (basic_int16x8 a) |
| Moves the 16-bit elements in a vector to the right by shift positions. More...
|
|
template<unsigned shift> |
basic_int16x16 | move_r (basic_int16x16 a) |
| Moves the 16-bit elements in a vector to the right by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int32x4 | move_r (basic_int32x4 a) |
| Moves the 32-bit elements in a vector to the right by shift positions. More...
|
|
template<unsigned shift> |
basic_int32x8 | move_r (basic_int32x8 a) |
| Moves the 32-bit elements in a vector to the right by shift positions. More...
|
|
|
template<unsigned shift> |
basic_int64x2 | move_r (basic_int64x2 a) |
| Moves the 64-bit elements in a vector to the right by shift positions. More...
|
|
template<unsigned shift> |
basic_int64x4 | move_r (basic_int64x4 a) |
| Moves the 64-bit elements in a vector to the right by shift positions. More...
|
|
|
template<unsigned shift> |
float32x4 | move_r (float32x4 a) |
| Moves the 32-bit elements in a vector to the right by shift positions. More...
|
|
template<unsigned shift> |
float32x8 | move_r (float32x8 a) |
| Moves the 32-bit elements in a vector to the right by shift positions. More...
|
|
|
template<unsigned shift> |
float64x2 | move_r (float64x2 a) |
| Moves the 64-bit elements in a vector to the right by shift positions. More...
|
|
template<unsigned shift> |
float64x4 | move_r (float64x4 a) |
| Moves the 64-bit elements in a vector to the right by shift positions. More...
|
|
|
template<unsigned s> |
basic_int8x16 | broadcast (basic_int8x16 a) |
| Broadcasts the specified 8-bit value to all elements within 128-bit lanes. More...
|
|
template<unsigned s> |
basic_int8x32 | broadcast (basic_int8x32 a) |
| Broadcasts the specified 8-bit value to all elements within 128-bit lanes. More...
|
|
|
template<unsigned s> |
basic_int16x8 | broadcast (basic_int16x8 a) |
| Broadcasts the specified 16-bit value to all elements within 128-bit lanes. More...
|
|
template<unsigned s> |
basic_int16x16 | broadcast (basic_int16x16 a) |
| Broadcasts the specified 16-bit value to all elements within 128-bit lanes. More...
|
|
|
template<unsigned s> |
basic_int32x4 | broadcast (basic_int32x4 a) |
| Broadcasts the specified 32-bit value to all elements within 128-bit lanes. More...
|
|
template<unsigned s> |
basic_int32x8 | broadcast (basic_int32x8 a) |
| Broadcasts the specified 32-bit value to all elements within 128-bit lanes. More...
|
|
|
template<unsigned s> |
basic_int64x2 | broadcast (basic_int64x2 a) |
| Broadcasts the specified 64-bit value to all elements within 128-bit lanes. More...
|
|
template<unsigned s> |
basic_int64x4 | broadcast (basic_int64x4 a) |
| Broadcasts the specified 64-bit value to all elements within 128-bit lanes. More...
|
|
|
template<unsigned s> |
float32x4 | broadcast (float32x4 a) |
| Broadcasts the specified 32-bit value to all elements within 128-bit lanes. More...
|
|
template<unsigned s> |
float32x8 | broadcast (float32x8 a) |
| Broadcasts the specified 32-bit value to all elements within 128-bit lanes. More...
|
|
|
template<unsigned s> |
float64x2 | broadcast (float64x2 a) |
| Broadcasts the specified 64-bit value to all elements within 128-bit lanes. More...
|
|
template<unsigned s> |
float64x4 | broadcast (float64x4 a) |
| Broadcasts the specified 64-bit value to all elements within 128-bit lanes. More...
|
|
|
template<unsigned s> |
basic_int8x16 | broadcast_w (basic_int8x16 a) |
| Broadcasts the specified 8-bit value to all elements within 128-bit lane. More...
|
|
template<unsigned s> |
basic_int8x32 | broadcast_w (basic_int8x32 a) |
| Broadcasts the specified 8-bit value to all elements within 128-bit lane. More...
|
|
|
template<unsigned s> |
basic_int16x8 | broadcast_w (basic_int16x8 a) |
| Broadcasts the specified 16-bit value to all elements within a int16x8 vector. More...
|
|
template<unsigned s> |
basic_int16x16 | broadcast_w (basic_int16x16 a) |
| Broadcasts the specified 16-bit value to all elements within a int16x8 vector. More...
|
|
|
template<unsigned s> |
basic_int32x4 | broadcast_w (basic_int32x4 a) |
| Broadcasts the specified 32-bit value to all elements within a int32x4 vector. More...
|
|
template<unsigned s> |
basic_int32x8 | broadcast_w (basic_int32x8 a) |
| Broadcasts the specified 32-bit value to all elements within a int32x4 vector. More...
|
|
|
template<unsigned s> |
basic_int64x2 | broadcast_w (basic_int64x2 a) |
| Broadcasts the specified 64-bit value to all elements within a int64x2 vector. More...
|
|
template<unsigned s> |
basic_int64x4 | broadcast_w (basic_int64x4 a) |
| Broadcasts the specified 64-bit value to all elements within a int64x2 vector. More...
|
|
|
template<unsigned s> |
float32x4 | broadcast_w (float32x4 a) |
| Broadcasts the specified 32-bit value to all elements within a float32x4 vector. More...
|
|
template<unsigned s> |
float32x8 | broadcast_w (float32x8 a) |
| Broadcasts the specified 32-bit value to all elements within a float32x4 vector. More...
|
|
|
template<unsigned s> |
float64x2 | broadcast_w (float64x2 a) |
| Broadcasts the specified 64-bit value to all elements within a float64x2 vector. More...
|
|
template<unsigned s> |
float64x4 | broadcast_w (float64x4 a) |
| Broadcasts the specified 64-bit value to all elements within a float64x2 vector. More...
|
|
|
template<unsigned shift> |
basic_int8x16 | align (basic_int8x16 lower, basic_int8x16 upper) |
| Extracts a int8x16 vector from two concatenated int8x16 vectors. More...
|
|
template<unsigned shift> |
basic_int8x32 | align (basic_int8x32 lower, basic_int8x32 upper) |
| Extracts a int8x16 vector from two concatenated int8x16 vectors. More...
|
|
|
template<unsigned shift> |
basic_int16x8 | align (basic_int16x8 lower, basic_int16x8 upper) |
| Extracts a int16x8 vector from two concatenated int16x8 vectors. More...
|
|
template<unsigned shift> |
basic_int16x16 | align (basic_int16x16 lower, basic_int16x16 upper) |
| Extracts a int16x8 vector from two concatenated int16x8 vectors. More...
|
|
|
template<unsigned shift> |
basic_int32x4 | align (basic_int32x4 lower, basic_int32x4 upper) |
| Extracts a int32x4 vector from two concatenated int32x4 vectors. More...
|
|
template<unsigned shift> |
basic_int32x8 | align (basic_int32x8 lower, basic_int32x8 upper) |
| Extracts a int32x4 vector from two concatenated int32x4 vectors. More...
|
|
|
template<unsigned shift> |
basic_int64x2 | align (basic_int64x2 lower, basic_int64x2 upper) |
| Extracts a int64x2 vector from two concatenated int64x2 vectors. More...
|
|
template<unsigned shift> |
basic_int64x4 | align (basic_int64x4 lower, basic_int64x4 upper) |
| Extracts a int64x2 vector from two concatenated int64x2 vectors. More...
|
|
|
template<unsigned shift> |
float32x4 | align (float32x4 lower, float32x4 upper) |
| Extracts a float32x4 vector from two concatenated float32x4 vectors. More...
|
|
template<unsigned shift> |
float32x8 | align (float32x8 lower, float32x8 upper) |
| Extracts a float32x4 vector from two concatenated float32x4 vectors. More...
|
|
|
template<unsigned shift> |
float64x2 | align (float64x2 lower, float64x2 upper) |
| Extracts a float64x2 vector from two concatenated float64x2 vectors. More...
|
|
template<unsigned shift> |
float64x4 | align (float64x4 lower, float64x4 upper) |
| Extracts a float64x2 vector from two concatenated float64x2 vectors. More...
|
|
|
basic_int8x16 | blend (basic_int8x16 on, basic_int8x16 off, basic_int8x16 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int8x16 | blend (basic_int8x16 on, basic_int8x16 off, mask_int8x16 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int8x32 | blend (basic_int8x32 on, basic_int8x32 off, basic_int8x32 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int8x32 | blend (basic_int8x32 on, basic_int8x32 off, mask_int8x32 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
|
basic_int16x8 | blend (basic_int16x8 on, basic_int16x8 off, basic_int16x8 mask) |
| Composes vector from two sources according to a mask. More...
|
|
basic_int16x16 | blend (basic_int16x16 on, basic_int16x16 off, basic_int16x16 mask) |
| Composes vector from two sources according to a mask. More...
|
|
basic_int16x8 | blend (basic_int16x8 on, basic_int16x8 off, mask_int16x8 mask) |
| Composes vector from two sources according to a mask. More...
|
|
basic_int16x16 | blend (basic_int16x16 on, basic_int16x16 off, mask_int16x16 mask) |
| Composes vector from two sources according to a mask. More...
|
|
|
basic_int32x4 | blend (basic_int32x4 on, basic_int32x4 off, basic_int32x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int32x8 | blend (basic_int32x8 on, basic_int32x8 off, basic_int32x8 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int32x4 | blend (basic_int32x4 on, basic_int32x4 off, mask_int32x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int32x8 | blend (basic_int32x8 on, basic_int32x8 off, mask_int32x8 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
|
basic_int64x2 | blend (basic_int64x2 on, basic_int64x2 off, basic_int64x2 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int64x4 | blend (basic_int64x4 on, basic_int64x4 off, basic_int64x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int64x2 | blend (basic_int64x2 on, basic_int64x2 off, mask_int64x2 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
basic_int64x4 | blend (basic_int64x4 on, basic_int64x4 off, mask_int64x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
|
float32x4 | blend (float32x4 on, float32x4 off, float32x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float32x4 | blend (float32x4 on, float32x4 off, int128 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float32x8 | blend (float32x8 on, float32x8 off, float32x8 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float32x8 | blend (float32x8 on, float32x8 off, int256 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float32x4 | blend (float32x4 on, float32x4 off, mask_float32x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float32x8 | blend (float32x8 on, float32x8 off, mask_float32x8 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
|
float64x2 | blend (float64x2 on, float64x2 off, float64x2 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float64x2 | blend (float64x2 on, float64x2 off, int128 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float64x4 | blend (float64x4 on, float64x4 off, float64x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float64x4 | blend (float64x4 on, float64x4 off, int256 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float64x2 | blend (float64x2 on, float64x2 off, mask_float64x2 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
float64x4 | blend (float64x4 on, float64x4 off, mask_float64x4 mask) |
| Composes a vector from two sources according to a mask. More...
|
|
|
basic_int8x16 | unzip_lo (basic_int8x16 a, basic_int8x16 b) |
| De-interleaves the odd(lower) elements of two int8x16 vectors. More...
|
|
basic_int8x32 | unzip_lo (basic_int8x32 a, basic_int8x32 b) |
| De-interleaves the odd(lower) elements of two int8x16 vectors. More...
|
|
|
basic_int16x8 | unzip_lo (basic_int16x8 a, basic_int16x8 b) |
| De-interleaves the odd(lower) elements of two int16x8 vectors. More...
|
|
basic_int16x16 | unzip_lo (basic_int16x16 a, basic_int16x16 b) |
| De-interleaves the odd(lower) elements of two int16x8 vectors. More...
|
|
|
basic_int32x4 | unzip_lo (basic_int32x4 a, basic_int32x4 b) |
| De-interleaves the odd(lower) elements of two int32x4 vectors. More...
|
|
basic_int32x8 | unzip_lo (basic_int32x8 a, basic_int32x8 b) |
| De-interleaves the odd(lower) elements of two int32x4 vectors. More...
|
|
|
basic_int64x2 | unzip_lo (basic_int64x2 a, basic_int64x2 b) |
| De-interleaves the odd(lower) elements of two int64x2 vectors. More...
|
|
basic_int64x4 | unzip_lo (basic_int64x4 a, basic_int64x4 b) |
| De-interleaves the odd(lower) elements of two int64x2 vectors. More...
|
|
|
float32x4 | unzip_lo (float32x4 a, float32x4 b) |
| De-interleaves the odd(lower) elements of two float32x4 vectors. More...
|
|
float32x8 | unzip_lo (float32x8 a, float32x8 b) |
| De-interleaves the odd(lower) elements of two float32x4 vectors. More...
|
|
|
float64x2 | unzip_lo (float64x2 a, float64x2 b) |
| De-interleaves the odd(lower) elements of two float64x2 vectors. More...
|
|
float64x4 | unzip_lo (float64x4 a, float64x4 b) |
| De-interleaves the odd(lower) elements of two float64x2 vectors. More...
|
|
|
basic_int8x16 | unzip_hi (basic_int8x16 a, basic_int8x16 b) |
| De-interleaves the even(higher) elements of two int8x16 vectors. More...
|
|
basic_int8x32 | unzip_hi (basic_int8x32 a, basic_int8x32 b) |
| De-interleaves the even(higher) elements of two int8x16 vectors. More...
|
|
|
basic_int16x8 | unzip_hi (basic_int16x8 a, basic_int16x8 b) |
| De-interleaves the even(higher) elements of two int16x8 vectors. More...
|
|
basic_int16x16 | unzip_hi (basic_int16x16 a, basic_int16x16 b) |
| De-interleaves the even(higher) elements of two int16x8 vectors. More...
|
|
|
basic_int32x4 | unzip_hi (basic_int32x4 a, basic_int32x4 b) |
| De-interleaves the even(higher) elements of two int32x4 vectors. More...
|
|
basic_int32x8 | unzip_hi (basic_int32x8 a, basic_int32x8 b) |
| De-interleaves the even(higher) elements of two int32x4 vectors. More...
|
|
|
basic_int64x2 | unzip_hi (basic_int64x2 a, basic_int64x2 b) |
| De-interleaves the even(higher) elements of two int64x2 vectors. More...
|
|
basic_int64x4 | unzip_hi (basic_int64x4 a, basic_int64x4 b) |
| De-interleaves the even(higher) elements of two int64x2 vectors. More...
|
|
|
float32x4 | unzip_hi (float32x4 a, float32x4 b) |
| De-interleaves the even(higher) elements of two float32x4 vectors. More...
|
|
float32x8 | unzip_hi (float32x8 a, float32x8 b) |
| De-interleaves the even(higher) elements of two float32x4 vectors. More...
|
|
|
float64x2 | unzip_hi (float64x2 a, float64x2 b) |
| De-interleaves the even(higher) elements of two float64x2 vectors. More...
|
|
float64x4 | unzip_hi (float64x4 a, float64x4 b) |
| De-interleaves the even(higher) elements of two float64x2 vectors. More...
|
|
|
int128 | permute_bytes16 (int128 a, int128 mask) |
| Selects bytes from a vector according to a mask. More...
|
|
float32x4 | permute_bytes16 (float32x4 a, int128 mask) |
| Selects bytes from a vector according to a mask. More...
|
|
float64x2 | permute_bytes16 (float64x2 a, int128 mask) |
| Selects bytes from a vector according to a mask. More...
|
|
int256 | permute_bytes16 (int256 a, int256 mask) |
| Selects bytes from a vector according to a mask. More...
|
|
float32x8 | permute_bytes16 (float32x8 a, int256 mask) |
| Selects bytes from a vector according to a mask. More...
|
|
float64x4 | permute_bytes16 (float64x4 a, int256 mask) |
| Selects bytes from a vector according to a mask. More...
|
|
|
int128 | shuffle_bytes16 (int128 a, int128 b, int128 mask) |
| Selects bytes from two vectors according to a mask. More...
|
|
float32x4 | shuffle_bytes16 (float32x4 a, float32x4 b, int128 mask) |
| Selects bytes from two vectors according to a mask. More...
|
|
float64x2 | shuffle_bytes16 (float64x2 a, float64x2 b, int128 mask) |
| Selects bytes from two vectors according to a mask. More...
|
|
int256 | shuffle_bytes16 (int256 a, int256 b, int256 mask) |
| Selects bytes from two vectors according to a mask. More...
|
|
float32x8 | shuffle_bytes16 (float32x8 a, float32x8 b, int256 mask) |
| Selects bytes from two vectors according to a mask. More...
|
|
float64x4 | shuffle_bytes16 (float64x4 a, float64x4 b, int256 mask) |
| Selects bytes from two vectors according to a mask. More...
|
|
|
int128 | permute_zbytes16 (int128 a, int128 mask) |
| Selects bytes from a vector according to a mask, optionally selecting zero. More...
|
|
float32x4 | permute_zbytes16 (float32x4 a, int128 mask) |
| Selects bytes from a vector according to a mask, optionally selecting zero. More...
|
|
float64x2 | permute_zbytes16 (float64x2 a, int128 mask) |
| Selects bytes from a vector according to a mask, optionally selecting zero. More...
|
|
int256 | permute_zbytes16 (int256 a, int256 mask) |
| Selects bytes from a vector according to a mask, optionally selecting zero. More...
|
|
float32x8 | permute_zbytes16 (float32x8 a, int256 mask) |
| Selects bytes from a vector according to a mask, optionally selecting zero. More...
|
|
float64x4 | permute_zbytes16 (float64x4 a, int256 mask) |
| Selects bytes from a vector according to a mask, optionally selecting zero. More...
|
|
|
int128 | shuffle_zbytes16 (int128 a, int128 b, int128 mask) |
| Selects bytes from two vectors according to a mask, optionally selecting zero. More...
|
|
float32x4 | shuffle_zbytes16 (float32x4 a, float32x4 b, int128 mask) |
| Selects bytes from two vectors according to a mask, optionally selecting zero. More...
|
|
float64x2 | shuffle_zbytes16 (float64x2 a, float64x2 b, int128 mask) |
| Selects bytes from two vectors according to a mask, optionally selecting zero. More...
|
|
int256 | shuffle_zbytes16 (int256 a, int256 b, int256 mask) |
| Selects bytes from two vectors according to a mask, optionally selecting zero. More...
|
|
float32x8 | shuffle_zbytes16 (float32x8 a, float32x8 b, int256 mask) |
| Selects bytes from two vectors according to a mask, optionally selecting zero. More...
|
|
float64x4 | shuffle_zbytes16 (float64x4 a, float64x4 b, int256 mask) |
| Selects bytes from two vectors according to a mask, optionally selecting zero. More...
|
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
int128 | permute (basic_int16x8 a) |
| Permutes the 16-bit values within each 4 consecutive values of the vector. More...
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
basic_int16x16 | permute (basic_int16x16 a) |
| Permutes the 16-bit values within each 4 consecutive values of the vector. More...
|
|
|
template<unsigned s0, unsigned s1> |
basic_int16x8 | permute (basic_int16x8 a) |
| Permutes the 16-bit values within sets of two consecutive elements of the vector. More...
|
|
template<unsigned s0, unsigned s1> |
basic_int16x16 | permute (basic_int16x16 a) |
| Permutes the 16-bit values within sets of two consecutive elements of the vector. More...
|
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
basic_int32x4 | permute (basic_int32x4 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
basic_int32x8 | permute (basic_int32x8 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
|
template<unsigned s0, unsigned s1> |
basic_int32x4 | permute (basic_int32x4 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
template<unsigned s0, unsigned s1> |
basic_int32x8 | permute (basic_int32x8 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
float32x4 | permute (float32x4 a) |
| Permutes the values of each set of four consecutive 32-bit floating point values. More...
|
|
template<unsigned s0, unsigned s1, unsigned s2, unsigned s3> |
float32x8 | permute (float32x8 a) |
| Permutes the values of each set of four consecutive 32-bit floating point values. More...
|
|
|
template<unsigned s0, unsigned s1> |
float32x4 | permute (float32x4 a) |
| Permutes the values of each set of four consecutive 32-bit floating-point values. More...
|
|
template<unsigned s0, unsigned s1> |
float32x8 | permute (float32x8 a) |
| Permutes the values of each set of four consecutive 32-bit floating-point values. More...
|
|
|
template<unsigned s0, unsigned s1> |
basic_int64x2 | permute (basic_int64x2 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
template<unsigned s0, unsigned s1> |
basic_int64x4 | permute (basic_int64x4 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
|
template<unsigned s0, unsigned s1> |
float64x2 | permute (float64x2 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
template<unsigned s0, unsigned s1> |
float64x4 | permute (float64x4 a) |
| Permutes the values of each set of four consecutive 32-bit values. More...
|
|
|
template<unsigned s0, unsigned s1> |
float64x2 | shuffle1 (float64x2 a, float64x2 b) |
| Selects 64-bit floating-point values from two vectors. More...
|
|
template<unsigned s0, unsigned s1> |
float64x4 | shuffle1 (float64x4 a, float64x4 b) |
| Selects 64-bit floating-point values from two vectors. More...
|
|
|
template<unsigned s0, unsigned s1> |
basic_int64x2 | shuffle1 (basic_int64x2 a, basic_int64x2 b) |
| Selects 64-bit values from two vectors. More...
|
|
template<unsigned s0, unsigned s1> |
basic_int64x4 | shuffle1 (basic_int64x4 a, basic_int64x4 b) |
| Selects 64-bit values from two vectors. More...
|
|
|
template<unsigned a0, unsigned a1, unsigned b0, unsigned b1> |
float32x4 | shuffle2 (float32x4 a, float32x4 b) |
| Selects 32-bit floating-point values from two vectors. More...
|
|
template<unsigned a0, unsigned a1, unsigned b0, unsigned b1> |
float32x8 | shuffle2 (float32x8 a, float32x8 b) |
| Selects 32-bit floating-point values from two vectors. More...
|
|
|
template<unsigned s0, unsigned s1> |
float32x4 | shuffle2 (float32x4 a, float32x4 b) |
| Selects 32-bit values from two vectors. More...
|
|
template<unsigned s0, unsigned s1> |
float32x8 | shuffle2 (float32x8 a, float32x8 b) |
| Selects 32-bit values from two vectors. More...
|
|
|
template<unsigned a0, unsigned a1, unsigned b0, unsigned b1> |
basic_int32x4 | shuffle2 (basic_int32x4 a, basic_int32x4 b) |
| Selects 32-bit values from two vectors. More...
|
|
template<unsigned a0, unsigned a1, unsigned b0, unsigned b1> |
basic_int32x8 | shuffle2 (basic_int32x8 a, basic_int32x8 b) |
| Selects 32-bit values from two vectors. More...
|
|
|
template<unsigned s0, unsigned s1> |
basic_int32x4 | shuffle2 (basic_int32x4 a, basic_int32x4 b) |
| Selects 32-bit values from two vectors. More...
|
|
template<unsigned s0, unsigned s1> |
basic_int32x8 | shuffle2 (basic_int32x8 a, basic_int32x8 b) |
| Selects 32-bit values from two vectors. More...
|
|
|
void | transpose2 (basic_int32x4 &a0, basic_int32x4 &a1) |
| Transposes two 2x2 32-bit matrices within two int32x4 vectors. More...
|
|
void | transpose2 (basic_int32x8 &a0, basic_int32x8 &a1) |
| Transposes two 2x2 32-bit matrices within two int32x4 vectors. More...
|
|
|
void | transpose2 (basic_int64x2 &a0, basic_int64x2 &a1) |
| Transposes a 2x2 64-bit matrix within two int64x2 vectors. More...
|
|
void | transpose2 (basic_int64x4 &a0, basic_int64x4 &a1) |
| Transposes a 2x2 64-bit matrix within two int64x2 vectors. More...
|
|
|
void | transpose2 (float32x4 &a0, float32x4 &a1) |
| Transposes two 2x2 32-bit matrices within two float32x4 vectors. More...
|
|
void | transpose2 (float32x8 &a0, float32x8 &a1) |
| Transposes two 2x2 32-bit matrices within two float32x4 vectors. More...
|
|
|
void | transpose2 (float64x2 &a0, float64x2 &a1) |
| Transposes a 2x2 64-bit matrix within two int64x2 vectors. More...
|
|
void | transpose2 (float64x4 &a0, float64x4 &a1) |
| Transposes a 2x2 64-bit matrix within two int64x2 vectors. More...
|
|
|
void | transpose4 (basic_int32x4 &a0, basic_int32x4 &a1, basic_int32x4 &a2, basic_int32x4 &a3) |
| Transposes a 4x4 32-bit matrix within four int32x4 vectors. More...
|
|
|
void | transpose4 (basic_int8x16 &a0, basic_int8x16 &a1, basic_int8x16 &a2, basic_int8x16 &a3) |
| Transposes four 4x4 8-bit matrix within four int8x16 vectors. More...
|
|
void | transpose4 (basic_int32x8 &a0, basic_int32x8 &a1, basic_int32x8 &a2, basic_int32x8 &a3) |
| Transposes four 4x4 8-bit matrix within four int8x16 vectors. More...
|
|
void | transpose4 (basic_int8x32 &a0, basic_int8x32 &a1, basic_int8x32 &a2, basic_int8x32 &a3) |
| Transposes four 4x4 8-bit matrix within four int8x16 vectors. More...
|
|
|
void | transpose4 (basic_int16x8 &a0, basic_int16x8 &a1, basic_int16x8 &a2, basic_int16x8 &a3) |
| Transposes two 4x4 16-bit matrices within four int16x8 vectors. More...
|
|
void | transpose4 (basic_int16x16 &a0, basic_int16x16 &a1, basic_int16x16 &a2, basic_int16x16 &a3) |
| Transposes two 4x4 16-bit matrices within four int16x8 vectors. More...
|
|
|
void | transpose4 (float32x4 &a0, float32x4 &a1, float32x4 &a2, float32x4 &a3) |
| Transposes 4x4 32-bit matrix within four float32x4 vectors. More...
|
|
void | transpose4 (float32x8 &a0, float32x8 &a1, float32x8 &a2, float32x8 &a3) |
| Transposes 4x4 32-bit matrix within four float32x4 vectors. More...
|
|