-
Notifications
You must be signed in to change notification settings - Fork 0
/
avx_array.hpp
152 lines (115 loc) · 3 KB
/
avx_array.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#ifndef AVX_ARRAY_HPP_INCLUDED
#define AVX_ARRAY_HPP_INCLUDED
#ifdef _WIN32
#include <malloc.h>
#endif
#include <cassert>
#include <cstddef>
#include <immintrin.h>
namespace expression_template_simd
{
INLINE __m256 add(const __m256& lhs, const __m256& rhs)
{
return _mm256_add_ps(lhs, rhs);
}
INLINE __m256 mul(const __m256& lhs, const __m256& rhs)
{
return _mm256_mul_ps(lhs, rhs);
}
INLINE __m256 madd(const __m256& a, const __m256& b, const __m256& c)
{
return _mm256_add_ps(a, _mm256_mul_ps(b, c));
}
INLINE __m256 square_root(const __m256& v)
{
return _mm256_sqrt_ps(v);
}
INLINE float get(const __m256& value, std::size_t i)
{
return value.m256_f32[i];
}
template <typename Real>
class valarray_rep_avx;
template <>
class valarray_rep_avx<float>
{
public:
typedef float value_type;
typedef __m256 element_type;
INLINE valarray_rep_avx(std::size_t size)
: _size(size)
, _elements((size / element_size()) + ((size % element_size() == 0) ? 0 : 1))
{
_values = (element_type*)_mm_malloc(_elements * sizeof(element_type), alignment());
}
INLINE valarray_rep_avx(std::size_t size, value_type value)
: _size(size)
, _elements((size / element_size()) + ((size % element_size() == 0) ? 0 : 1))
{
_values = (element_type*)_mm_malloc(_elements * sizeof(element_type), alignment());
const __m256 value_sse = _mm256_set1_ps(value);
for (std::size_t i = 0; i < _elements; ++i)
_values[i] = value_sse;
}
INLINE ~valarray_rep_avx()
{
_mm_free(_values);
}
INLINE valarray_rep_avx(const valarray_rep_avx& copy)
: _size(copy._size)
, _elements(copy._elements)
{
_values = (element_type*)_mm_malloc(_elements, alignment());
swap(copy);
}
INLINE valarray_rep_avx& operator= (const valarray_rep_avx& copy)
{
swap(copy);
return *this;
}
INLINE element_type operator() (std::size_t i) const
{
assert(i < _elements);
return _values[i];
}
INLINE element_type& operator() (std::size_t i)
{
assert(i < _elements);
return _values[i];
}
INLINE float operator[] (std::size_t i) const
{
assert(i < _size);
const std::size_t element = i / element_size();
const std::size_t index = i % element_size();
return get(_values[element], index);
}
INLINE std::size_t size() const
{
return _size;
}
INLINE std::size_t elements() const
{
return _elements;
}
INLINE static std::size_t alignment()
{
return sizeof(element_type);
}
INLINE static std::size_t element_size()
{
return sizeof(element_type) / sizeof(value_type);
}
INLINE void swap(const valarray_rep_avx& copy)
{
assert(_size == copy._size);
for (std::size_t i = 0; i < _elements; ++i)
_values[i] = copy._values[i];
}
private:
std::size_t _size;
std::size_t _elements;
element_type* _values;
} ; // end class valarray_rep_avx<float>
} // end namespace expression_template_simd
#endif // end AVX_ARRAY_HPP_INCLUDED