-
Notifications
You must be signed in to change notification settings - Fork 0
/
cg.c
141 lines (119 loc) · 3.76 KB
/
cg.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include "common.h"
#include "blas.h"
#include "devMem.h"
void cg(int n, real_type nnz,
int *ia, //matrix csr data
int *ja,
real_type *a,
real_type *x, //solution vector, mmust be alocated prior to calling
real_type *b, //rhs
real_type tol, //DONT MULTIPLY BY NORM OF B
pdata* prec_data, //preconditioner data: all Ls, Us etc
int maxit,
int *it, //output: iteration
int *flag, //output: flag 0-converged, 1-maxit reached, 2-catastrophic failure
real_type *res_norm_history //output: residual norm history
){
#if (CUDA || HIP)
real_type *r;
real_type *w;
real_type *p;
real_type *q;
r = (real_type*) mallocForDevice(r, n, sizeof(real_type));
w = (real_type*) mallocForDevice(w, n, sizeof(real_type));
p = (real_type*) mallocForDevice(p, n, sizeof(real_type));
q = (real_type*) mallocForDevice(q, n, sizeof(real_type));
#if HIP
vec_zero(n, w);
vec_zero(n, r);
vec_zero(n, p);
vec_zero(n, q);
#endif
#else
real_type *r = (real_type *) calloc (n, sizeof(real_type));
real_type *w = (real_type *) calloc (n, sizeof(real_type));
real_type *p = (real_type *) calloc (n, sizeof(real_type));
real_type *q = (real_type *) calloc (n, sizeof(real_type));
#endif
real_type alpha, beta, tolrel, rho_current, rho_previous, pTq;
real_type one = 1.0;
real_type zero = 0.0;
int notconv = 1, iter = 0;
//compute initial norm of r
/* r = A*x */
csr_matvec(n, nnz, ia, ja, a, x, r, &one, &zero, "A");
/* r = -b +r = Ax-b */
axpy(n, -1.0, b, r);
// printf("Norm of r %e \n", dot(n, r,r));
/* r=(-1.0)*r */
scal(n, -1.0, r);
/* norm of r */
res_norm_history[0] = dot(n, r,r);
res_norm_history[0] = sqrt(res_norm_history[0]);
tolrel = tol * res_norm_history[0];
printf("CG: it %d, res norm %5.5e \n",0, res_norm_history[0]);
while (notconv){
// printf("Norm of X before prec %16.16e \n", dot(n, r,r));
#if HIP
vec_zero(n, w);
#endif
prec_function(ia, ja, a, nnz, prec_data, r, w);
// printf("Norm of X after prec %16.16e \n", dot(n, w,w));
/* rho_current = r'*w; */
rho_current = dot(n, r, w);
if (iter == 0) {
vec_copy(n, w, p);
} else {
beta = rho_current/rho_previous;
// printf("scaling by beta = %5.5e, rho_current = %5.5e, rho_previous = %5.5e \n", beta, rho_current, rho_previous);
/* p = w+bet*p; */
scal(n, beta, p);
axpy(n, 1.0, w, p);
}
/* q = As*p; */
csr_matvec(n, nnz, ia, ja, a, p, q, &one, &zero, "A");
/* alpha = rho_current/(p'*q);*/
// printf("p'*p = %16.16e, q'*q = %16.16f \n",dot(n, p,p), dot(n, q,q) );
pTq = dot(n, p, q);
alpha = rho_current / pTq;
// printf("p^Tq = %5.15e,rho_current = %5.15e, alpha = %5.15e \n", pTq, rho_current, alpha);
/* x = x + alph*p; */
axpy(n, alpha, p, x);
/* r = r - alph*q; */
axpy(n, (-1.0) * alpha, q, r );
/* norm of r */
iter++;
res_norm_history[iter] = dot(n, r,r);
res_norm_history[iter] = sqrt(res_norm_history[iter]);
printf("CG: it %d, res norm %5.5e \n",iter, res_norm_history[iter]);
/* check convergence */
if ((res_norm_history[iter]) < tolrel) {
*flag = 0;
notconv = 0;
*it = iter;
/* r = A*x */
csr_matvec(n, nnz, ia, ja, a, x, r, &one, &zero, "A");
/* r = -b +r = Ax-b */
axpy(n, -1.0, b, r);
printf("TRUE Norm of r %5.5e relative %16.16e\n", sqrt(dot(n, r,r)), sqrt(dot(n, r,r))/sqrt(dot(n, b,b)));
} else {
if (iter > maxit){
*flag = 1;
notconv = 0;
*it = iter;
}
}
rho_previous = rho_current;
}//while
#if (CUDA || HIP)
freeDevice(r);
freeDevice(w);
freeDevice(p);
freeDevice(q);
#else
free(r);
free(w);
free(p);
free(q);
#endif
} /* cg */