|
| 1 | +// Compile with C not C++, as G++ misses some vector operators in C++ mode |
| 2 | +// cc -mavx -O3 -c -o simd_capabilities.o simd_capabilities.c |
| 3 | + |
| 4 | +// Peek into machine code |
| 5 | +// objdump -d simd_capabilities.o |
| 6 | + |
| 7 | +// NOTES: |
| 8 | +// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-July/051470.html |
| 9 | + |
| 10 | +#include <stdio.h> |
| 11 | +#include <stdint.h> |
| 12 | +#include <immintrin.h> |
| 13 | + |
| 14 | +#define SIMD 8 |
| 15 | + |
| 16 | +#define simd_int_t(s) int ## (s*8) ## _t |
| 17 | +#define simd_uint_t(s) uint ## (s*8) ## _t |
| 18 | + |
| 19 | +typedef float float_t; |
| 20 | +typedef int32_t int_t; |
| 21 | +typedef uint32_t uint_t; |
| 22 | + |
| 23 | +#define dupn(num) dup ## num |
| 24 | +#define dup(x, num) dupn(num)(x) |
| 25 | +#define dup1(x) x |
| 26 | +#define dup2(x) x, x |
| 27 | +#define dup4(x) dup2(x), dup2(x) |
| 28 | +#define dup8(x) dup4(x), dup4(x) |
| 29 | +#define dup16(x) dup8(x), dup8(x) |
| 30 | +#define dup32(x) dup16(x), dup16(x) |
| 31 | +#define dup64(x) dup32(x), dup32(x) |
| 32 | + |
| 33 | +typedef float_t fvec_t __attribute__((vector_size((SIMD)*sizeof(float_t)))); |
| 34 | +typedef int_t ivec_t __attribute__((vector_size((SIMD)*sizeof(int_t)))); |
| 35 | +typedef uint_t uvec_t __attribute__((vector_size((SIMD)*sizeof(uint_t)))); |
| 36 | +#define fvec_t(x) (fvec_t){ dup((float_t)x, SIMD) } |
| 37 | +#define uvec_t(x) (uvec_t){ dup( (uint_t)x, SIMD) } |
| 38 | +#define float_t(x, c) (((float_t *)&x)[c]) |
| 39 | +#define int_t(x, c) (((int_t *)&x)[c]) |
| 40 | +#define uint_t(x, c) (((uint_t *)&x)[c]) |
| 41 | + |
| 42 | +fvec_t faddps(fvec_t a, fvec_t b) { return a + b; } |
| 43 | +fvec_t fmulps(fvec_t a, fvec_t b) { return a * b; } |
| 44 | +fvec_t fmadps(fvec_t a, fvec_t b, fvec_t c) { return c + a * b; } |
| 45 | +#ifndef __ICC |
| 46 | +uvec_t fcmpps(fvec_t a, fvec_t b) { return (a < b); } |
| 47 | +uvec_t ucmpps(fvec_t a, fvec_t b) { return (a < b); } |
| 48 | +uvec_t uxorps(uvec_t a, uvec_t b) { return a ^ b; } |
| 49 | +uvec_t ushlps(uvec_t a, uint_t s) { |
| 50 | +#if SIMD == 8 |
| 51 | +#else |
| 52 | + return (a << uvec_t(s)); |
| 53 | +#endif |
| 54 | +} |
| 55 | +fvec_t fminps(fvec_t a, fvec_t b) { |
| 56 | + uvec_t c = (a < b); |
| 57 | + return (fvec_t)(((uvec_t)a & c) | ((uvec_t)b & ~c)); |
| 58 | +} |
| 59 | +#endif |
| 60 | + |
| 61 | +#if 0 |
| 62 | +int main (int argc, char const *argv[]) |
| 63 | +{ |
| 64 | + ivec_t a = {1, 2, 3, 4}; |
| 65 | + ivec_t b = {3, 2, 1, 0}; |
| 66 | + ivec_t m = min(a, b); |
| 67 | + printf("%d %d %d %d\n", int_t(m, 0), int_t(m, 1), int_t(m, 2), int_t(m, 3)); |
| 68 | + return 0; |
| 69 | +} |
| 70 | +#endif |
0 commit comments