-
Notifications
You must be signed in to change notification settings - Fork 3
/
test-kernels.cpp
140 lines (96 loc) · 3.54 KB
/
test-kernels.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// g++ -Wall -O3 -march=native -o test-kernels test-kernels.cpp
#include <vector>
#include <cstdlib>
#include <cassert>
#include <iostream>
#include "ch_vdif_assembler_kernels.hpp"
using namespace std;
using namespace ch_vdif_assembler;
static uint8_t rand8()
{
double r = (rand() + 0.5) / (RAND_MAX + 1.0);
return (uint8_t)(256*r);
}
static vector<uint8_t> randvec(int nelts)
{
assert(nelts > 0);
vector<uint8_t> ret(nelts);
for (int i = 0; i < nelts; i++)
ret[i] = rand8();
return ret;
}
// -------------------------------------------------------------------------------------------------
static void test_assemble128(int stride, int n, int dst_offset, int src_offset)
{
assert(stride >= n);
int nsrc = src_offset + 8*n + 1; // the "+1" is for testing the n==0 case
int ndst = dst_offset + 7*stride + n + 64; // the "+64" is to test for writing past end of buffer
vector<uint8_t> dst1 = randvec(ndst);
vector<uint8_t> src1 = randvec(nsrc);
vector<uint8_t> dst0 = dst1;
vector<uint8_t> dst2 = dst1;
vector<uint8_t> src2 = src1;
_assemble8(&dst1[dst_offset], stride, &src1[src_offset], n);
_assemble128(&dst2[dst_offset], stride, &src2[src_offset], n);
for (int i = 0; i < ndst; i++) {
if (dst1[i] == dst2[i])
continue;
cerr << "\ntest_assemble128() failed\n"
<< " stride=" << stride << ", n=" << n << ", dst_offset=" << dst_offset << ", src_offset=" << src_offset << "\n";
for (int ii = max(i-8,0); ii < min(i+9,ndst); ii++) {
cerr << " i=" << ii
<< ", dst0=" << hex << (unsigned int)(dst0[ii]) << dec
<< ", dst1=" << hex << (unsigned int)(dst1[ii]) << dec
<< ", dst2=" << hex << (unsigned int)(dst2[ii]) << dec;
if (ii == i)
cerr << " [ first failure here ]";
cerr << "\n";
}
exit(1);
}
}
static void test_assemble128()
{
const int stride = 512; // OK to fix this to a large power of 2
const int nmax = 200; // Go to a fairly large value, in anticipation of 64-byte AVX512 kernels
const int dst_offset_max = 64; // also in anticipation of 64-byte kernels
const int src_offset_max = 64; // also in anticipation of 64-byte kernels
cerr << "test_assemble128";
for (int dst_offset = 0; dst_offset < dst_offset_max; dst_offset++) {
cerr << ".";
for (int src_offset = 0; src_offset < src_offset_max; src_offset++)
for (int n = 0; n < nmax; n++)
test_assemble128(stride, n, dst_offset, src_offset);
}
cerr << "pass\n";
}
// -------------------------------------------------------------------------------------------------
static void test_sum16_auto_correlations()
{
for (int n = 0; n < 1000000; n++) {
vector<uint8_t> data = randvec(16);
vector<uint8_t> data2 = data;
int sum1, count1;
_sum16_auto_correlations_reference(sum1, count1, &data[0]);
int sum2, count2;
_sum16_auto_correlations(sum2, count2, &data[0]);
if ((sum1 == sum2) && (count1 == count2))
continue;
cerr << "test_sum16_auto_correlations() failed\n";
cerr << " data = [";
for (int i = 0; i < 16; i++)
cerr << " " << hex << (unsigned int)(data[i]) << dec;
cerr << " ]\n"
<< " sum1=" << sum1 << " count1=" << count1 << "\n"
<< " sum2=" << sum2 << " count2=" << count2 << "\n";
exit(1);
}
cerr << "test_sum16_auto_correlations: pass\n";
}
// -------------------------------------------------------------------------------------------------
int main(int argc, char **argv)
{
test_sum16_auto_correlations();
test_assemble128();
return 0;
}