-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.c
242 lines (194 loc) · 5.26 KB
/
utils.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#include "utils.h"
#include <stdio.h>
#include <math.h>
int get_size_with_padding(const int n)
{
#if defined(__AVX512F__)
// Round up n to the next multiple of 8.
return 8 * ((n + 7) / 8);
#else
// Round up n to the next multiple of 4.
return 4 * ((n + 3) / 4);
#endif
}
void copy_matrix(
double ***in_blocks, int ldin,
double ***out_blocks, int ldout,
partitioning_t *p,
memory_layout_t layout)
{
int num_blk_rows = p->num_blk_rows;
int num_blk_cols = p->num_blk_cols;
int *first_row = p->first_row;
int *first_col = p->first_col;
// Copy out_blocks := in_blocks.
for (int j = 0; j < num_blk_cols; j++) {
for (int i = 0; i < num_blk_rows; i++) {
const int num_rows = first_row[i + 1] - first_row[i];
const int num_cols = first_col[j + 1] - first_col[j];
// Copy block out_i,j := in_i,j.
if (layout == COLUMN_MAJOR) {
copy_block(num_rows, num_cols, in_blocks[i][j], ldin,
out_blocks[i][j], ldout);
}
else { // TILE_LAYOUT
copy_block(num_rows, num_cols, in_blocks[i][j], num_rows,
out_blocks[i][j], num_rows);
}
}
}
}
void scale(
int n, double *restrict const x, const scaling_t *beta)
{
#ifdef INTSCALING
double alpha = ldexp(1.0, beta[0]);
#else
double alpha = beta[0];
#endif
// Scale vector, if necessary.
if (alpha != 1.0) {
for (int i = 0; i < n; i++) {
x[i] = alpha * x[i];
}
}
}
void scale_tile(int m, int n,
double *restrict const X, int ldX, const scaling_t *beta)
{
#ifdef INTSCALING
double alpha = ldexp(1.0, beta[0]);
#else
double alpha = beta[0];
#endif
#define X(i,j) X[(i) + (j) * ldX]
// Scale vector, if necessary.
if (alpha != 1.0) {
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
X(i,j) = alpha * X(i,j);
}
}
}
#undef X
}
// Scale everything that is not enclosed in (ilo:ihi,jlo:jhi)
void scale_excluding(int m, int n, int ilo, int ihi, int jlo, int jhi,
double *restrict const X, int ldX, const scaling_t *beta)
{
#ifdef INTSCALING
double alpha = ldexp(1.0, beta[0]);
#else
double alpha = beta[0];
#endif
#define X(i,j) X[(i) + (j) * ldX]
// Scale vector, if necessary.
if (alpha != 1.0) {
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
if ((ilo <= i && i <= ihi) && (jlo <= j && j <= jhi)) {
// Skip. We are in the area that is to be spared.
}
else {
X(i,j) = alpha * X(i,j);
}
}
}
}
#undef X
}
/// Convert int scaling factor to double-precision scaling, if necessary.
double convert_scaling(scaling_t alpha)
{
#ifdef INTSCALING
double scaling = ldexp(1.0, alpha);
#else
double scaling = alpha;
#endif
return scaling;
}
void init_scaling_factor(int n, scaling_t *restrict const alpha)
{
#ifdef INTSCALING
for (int i = 0; i < n; i++)
alpha[i] = 0;
#else
for (int i = 0; i < n; i++)
alpha[i] = 1.0;
#endif
}
/**
* @brief Compute common scaling factor alpha_min / alpha.
*/
double compute_upscaling(scaling_t alpha_min, scaling_t alpha)
{
double scaling;
#ifdef INTSCALING
// Common scaling is 2^alpha_min / 2^alpha.
scaling_t exp = alpha_min - alpha;
scaling = ldexp(1.0, exp);
#else
scaling = alpha_min / alpha;
#endif
return scaling;
}
/**
* @brief Compute common scaling factor (alpha_min / alpha) * beta.
*/
double compute_combined_upscaling(
scaling_t alpha_min, scaling_t alpha, scaling_t beta)
{
double scaling;
#ifdef INTSCALING
// Common scaling is (2^alpha_min / 2^alpha) * 2^beta.
scaling_t exp = alpha_min - alpha + beta;
scaling = ldexp(1.0, exp);
#else
scaling = (alpha_min / alpha) * beta;
#endif
return scaling;
}
void update_global_scaling(scaling_t *global, scaling_t phi)
{
#ifdef INTSCALING
*global = phi + (*global);
#else
*global = phi * (*global);
#endif
}
void update_norm(double *norm, scaling_t phi)
{
#ifdef INTSCALING
*norm = ldexp(1.0, phi) * (*norm);
#else
*norm = phi * (*norm);
#endif
}
void dgemm(
const char transa, const char transb,
const int m, const int n, const int k,
const double alpha, const double *restrict const A, const int ldA,
const double *restrict const B, const int ldB,
const double beta, double *restrict const C, const int ldC)
{
extern void dgemm_(
const char *transa, const char *transb,
const int *m, const int *n, const int *k,
const double *alpha, const double *a, const int *lda,
const double *b, const int *ldb,
const double *beta, double *c, const int *ldc);
dgemm_(&transa, &transb,
&m, &n, &k,
&alpha, A, &ldA,
B, &ldB,
&beta, C, &ldC);
}
double dlange(const char norm, const int m, const int n,
const double *restrict const A, const int ldA)
{
double work;
extern double dlange_(const char *norm, const int *m, const int *n,
const double *a, const int *ldA, double *work);
double res = dlange_(&norm, &m, &n, A, &ldA, &work);
return res;
}