-
Notifications
You must be signed in to change notification settings - Fork 0
/
raw_module.cu
213 lines (177 loc) · 8.26 KB
/
raw_module.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
extern "C" {
__global__ void atomic_1d(const float* pos,
const long long npoints, const long long a,
const double sampling, double* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints)
return ;
float diff_x = (pos[m] - pos[n]) * sampling;
int offset_x = __double2int_rd((a-1)*sampling);
int res_x = __double2int_rd(diff_x + offset_x);
if ((res_x <= 0 || res_x >= 2*offset_x))
return;
atomicAdd(&output[res_x], 1.);
}
__global__ void atomic_2d(const float* pos_x, const float* pos_y,
const long long npoints, const long long a, const long long b,
const double sampling, float* integ, double* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints)
return ;
int pos_xm = __double2int_rd(pos_x[m]);
int pos_ym = __double2int_rd(pos_y[m]);
int pos_xn = __double2int_rd(pos_x[n]);
int pos_yn = __double2int_rd(pos_y[n]);
if (n == 0) {
atomicAdd(&integ[pos_xm * b + pos_ym], 1.);
}
long long size = 2*b-1 ;
int offset_x = __double2int_rd((a-1)*sampling);
int offset_y = __double2int_rd((b-1)*sampling);
float diff_x = (pos_xm - pos_xn) * sampling;
float diff_y = (pos_ym - pos_yn) * sampling;
int res_x = diff_x + offset_x;
int res_y = diff_y + offset_y;
if ((res_x < 0 || res_x >= 2*a) || (res_y < 0 || res_y >= 2*b))
return;
atomicAdd(&output[res_x * size + res_y], 1.);
}
__global__ void atomic_sim(const float* pos_x, const float* pos_y,
const long long npoints, const long long a, const long long b,
const double sampling, float* sfr, float* integ, double* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints)
return ;
int pos_xm = __double2int_rd(pos_x[m]);
int pos_ym = __double2int_rd(pos_y[m]);
int pos_xn = __double2int_rd(pos_x[n]);
int pos_yn = __double2int_rd(pos_y[n]);
float fm = sfr[pos_xm * b + pos_ym];
float fn = sfr[pos_xn * b + pos_yn];
if (n == 0) {
atomicAdd(&integ[pos_xm * b + pos_ym], 1.);
}
long long size = 2*b-1 ;
int offset_x = __double2int_rd((a-1)*sampling);
int offset_y = __double2int_rd((b-1)*sampling);
float diff_x = (pos_xm - pos_xn) * sampling;
float diff_y = (pos_ym - pos_yn) * sampling;
int res_x = diff_x + offset_x;
int res_y = diff_y + offset_y;
if ((res_x < 0 || res_x >= 2*a) || (res_y < 0 || res_y >= 2*b))
return;
atomicAdd(&output[res_x * size + res_y], fm*fn);
}
__global__ void cross_2d(const float* pos1_x, const float* pos1_y, const
float* pos2_x, const float* pos2_y, const long long npoints, const long
long npoints2, const long long a, const long long b, const double sampling, double* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints2)
return ;
int pos1_xm = __double2int_rd(pos1_x[m]);
int pos1_ym = __double2int_rd(pos1_y[m]);
int pos2_xn = __double2int_rd(pos2_x[n]);
int pos2_yn = __double2int_rd(pos2_y[n]);
long long size = 2*b-1 ;
int offset_x = __double2int_rd((a-1)*sampling);
int offset_y = __double2int_rd((b-1)*sampling);
float diff_x = ((pos1_xm) - (pos2_xn)) * sampling;
float diff_y = ((pos1_ym) - (pos2_yn)) * sampling;
int res_x = diff_x + offset_x;
int res_y = diff_y + offset_y;
if ((res_x < 0 || res_x >= 2*a) || (res_y < 0 || res_y >= 2*b))
return;
atomicAdd(&output[(res_x) * size + res_y], 1.);
}
__global__ void cinteg_3d(const float* pos_x, const float* pos_y, const float* pos_z, const long long npoints, const long long a, const long long b, const long long c, const double sampling, float* integ, double* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints)
return ;
float fm = integ[(__double2int_rd(pos_x[m]) * b + __double2int_rd(pos_y[m]))*c + __double2int_rd(pos_z[m])];
float fn = integ[(__double2int_rd(pos_x[n]) * b + __double2int_rd(pos_y[n]))*c + __double2int_rd(pos_z[n])];
long long size_y = 2*b-1 ;
long long size_z = 2*c-1 ;
int offset_x = __double2int_rd((a-1)*sampling);
int offset_y = __double2int_rd((b-1)*sampling);
int offset_z = __double2int_rd((c-1)*sampling);
float diff_x = ((pos_x[m]) - (pos_x[n])) * sampling;
float diff_y = ((pos_y[m]) - (pos_y[n])) * sampling;
float diff_z = ((pos_z[m]) - (pos_z[n])) * sampling;
int res_x = __double2int_rd(diff_x) + offset_x;
int res_y = __double2int_rd(diff_y) + offset_y;
int res_z = __double2int_rd(diff_z) + offset_z;
if ((res_x < 0 || res_x >= 2*a) || (res_y < 0 || res_y >= 2*b) || (res_z < 0 || res_z >= 2*c))
return;
atomicAdd(&output[((res_x) * size_y + res_y)*size_z + res_z], fm*fn);
}
__global__ void atomic_3d(const float* pos_x, const float* pos_y, const float* pos_z, const long long npoints, const long long c0, const long long c1, const long long a, const long long b, const long long cz, const double sampling, float* integ, float* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints)
return ;
int pos_xm = __double2int_rd(pos_x[m]);
int pos_ym = __double2int_rd(pos_y[m]);
int pos_zm = __double2int_rd(pos_z[m]);
int pos_xn = __double2int_rd(pos_x[n]);
int pos_yn = __double2int_rd(pos_y[n]);
int pos_zn = __double2int_rd(pos_z[n]);
if (n == 0) {
atomicAdd(&integ[((pos_xm+c0) * b + pos_ym + c1) * cz + pos_zm], 1.);
}
long long size_y = 2*b-1 ;
long long size_z = 2*cz-1 ;
int offset_x = __double2int_rd((a-1)*sampling);
int offset_y = __double2int_rd((b-1)*sampling);
int offset_z = __double2int_rd((cz-1)*sampling);
float diff_x = ((pos_xm) - (pos_xn)) * sampling;
float diff_y = ((pos_ym) - (pos_yn)) * sampling;
float diff_z = ((pos_zm) - (pos_zn)) * sampling;
int res_x = __double2int_rd(diff_x) + offset_x;
int res_y = __double2int_rd(diff_y) + offset_y;
int res_z = __double2int_rd(diff_z) + offset_z;
if ((res_x < 0 || res_x >= 2*a) || (res_y < 0 || res_y >= 2*b) || (res_z < 0 || res_z >= 2*cz))
return;
atomicAdd(&output[((res_x) * size_y + res_y)*size_z + res_z], 1.);
}
__global__ void cross_3d(const float* pos1_x, const float* pos1_y, const float* pos1_z, const float* pos2_x, const float* pos2_y, const float* pos2_z,
const long long npoints, const long long npoints2, const long long a, const long long b, const long long c, const double sampling, double* output) {
long long m = blockDim.x * blockIdx.x + threadIdx.x;
long long n = blockDim.y * blockIdx.y + threadIdx.y;
if (m >= npoints || n >= npoints2)
return ;
int pos1_xm = __double2int_rd(pos1_x[m]);
int pos1_ym = __double2int_rd(pos1_y[m]);
int pos1_zm = __double2int_rd(pos1_z[m]);
int pos2_xn = __double2int_rd(pos2_x[n]);
int pos2_yn = __double2int_rd(pos2_y[n]);
int pos2_zn = __double2int_rd(pos2_z[n]);
long long size_y = 2*b-1 ;
long long size_z = 2*c-1 ;
int offset_x = __double2int_rd((a-1)*sampling);
int offset_y = __double2int_rd((b-1)*sampling);
int offset_z = __double2int_rd((c-1)*sampling);
float diff_x = ((pos1_xm) - (pos2_xn)) * sampling;
float diff_y = ((pos1_ym) - (pos2_yn)) * sampling;
float diff_z = ((pos1_zm) - (pos2_zn)) * sampling;
int res_x = __double2int_rd(diff_x) + offset_x;
int res_y = __double2int_rd(diff_y) + offset_y;
int res_z = __double2int_rd(diff_z) + offset_z;
if ((res_x < 0 || res_x >= 2*a) || (res_y < 0 || res_y >= 2*b) || (res_z < 0 || res_z >= 2*c))
return;
atomicAdd(&output[((res_x) * size_y + res_y)*size_z + res_z], 1.);
}
__global__
void thresh_frame(double *frame, const long long npix, const double threshold, const double *mask) {
int pix = blockIdx.x * blockDim.x + threadIdx.x ;
if (pix >= npix)
return ;
if (frame[pix] < threshold)
frame[pix] = 0 ;
frame[pix] *= mask[pix] ;
}
}