1 | #include <iostream> |
---|
2 | #include <iomanip> |
---|
3 | |
---|
4 | #include <x86intrin.h> |
---|
5 | |
---|
6 | int main() |
---|
7 | { |
---|
8 | #ifdef __SSE__ |
---|
9 | { |
---|
10 | float __attribute__ ((aligned(16))) Q[4]={1.f,2.f,3.f,4.f}; |
---|
11 | float __attribute__ ((aligned(16))) Z[4]; |
---|
12 | __m128 v4Q = _mm_load_ps(Q); |
---|
13 | __m128 v4X = _mm_mul_ps(v4Q,v4Q); |
---|
14 | _mm_store_ps(Z,v4X); |
---|
15 | for(int i=0; i!=4; ++i) |
---|
16 | std::cout << " Q["<<i<<"]="<<std::setw(6)<<Q[i] |
---|
17 | << " Z["<<i<<"]="<<std::setw(6)<<Z[i]<<'\n'; |
---|
18 | } |
---|
19 | #endif |
---|
20 | #ifdef __AVX__ |
---|
21 | { |
---|
22 | float __attribute__ ((aligned(32))) Q[8]={1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f}; |
---|
23 | float __attribute__ ((aligned(32))) Z[8]; |
---|
24 | __m256 v8Q = _mm256_load_ps(Q); |
---|
25 | __m256 v8X = _mm256_mul_ps(v8Q,v8Q); |
---|
26 | _mm256_store_ps(Z,v8X); |
---|
27 | for(int i=0; i!=8; ++i) |
---|
28 | std::cout << " Q["<<i<<"]="<<std::setw(6)<<Q[i] |
---|
29 | << " Z["<<i<<"]="<<std::setw(6)<<Z[i]<<'\n'; |
---|
30 | } |
---|
31 | #else |
---|
32 | # warning __AVX__ not #defined: this will not test cctools as |
---|
33 | # warning did you compile with -march=native? |
---|
34 | # warning if so, does your computer support AVX instructions? |
---|
35 | # warning (it will if "sysctl machdep.cpu.features" has "AVX1.0)" |
---|
36 | #endif |
---|
37 | } |
---|