-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathscrub_user_name_and_make_summary_files.py
93 lines (77 loc) · 3.24 KB
/
scrub_user_name_and_make_summary_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import glob
import os
from astropy.io import fits
from astropy.table import Table, vstack, hstack
from subprocess import call
from tqdm import tqdm
input_folder = '/Volumes/Work/GZ3D/Data_run_final_fits'
output_folder = '/Volumes/Work/GZ3D/Data_run_final_fits_no_user'
observed_data_hdu_list = fits.open('/Volumes/Work/GZ3D/FinalMaNGAgalaxies_bundlesize_NSA_HIflag.fits')
observed_data = Table(observed_data_hdu_list[1].data)
# strip whitespace from ids
observed_ids = [id.strip() for id in observed_data['MANGAID_1']]
observed_data_hdu_list.close()
metadata_counter = 0
centers_counter = 0
stars_counter = 0
output_metadata = os.path.join(output_folder, 'gz3d_metadata.csv')
output_centers = os.path.join(output_folder, 'galaxy_centers.csv')
output_stars = os.path.join(output_folder, 'foreground_stars.csv')
fits_list = glob.glob(input_folder + '/*fits.gz')
for f in tqdm(fits_list):
file_name = os.path.basename(f)[:-3]
subject_id = file_name.split('_')[-1].split('.')[0]
hdu_list = fits.open(f)
# create tables for summary files
metadata = Table(hdu_list[5].data)
manga_id = metadata['MANGAID'][0].strip()
centers = Table(hdu_list[6].data)
if len(centers) > 0:
centers.add_column(file_name, name='file_name', index=0)
centers.remove_columns(['x', 'y', 'var_x', 'var_y', 'var_x_y', 'var_ra', 'var_dec', 'var_ra_dec'])
centers = centers.to_pandas()
if centers_counter == 0:
# make new file
centers.to_csv(output_centers, mode='w', index=False)
centers_counter += 1
else:
# append to existing file
centers.to_csv(output_centers, mode='a', index=False, header=False)
stars = Table(hdu_list[7].data)
if len(stars) > 0:
stars.add_column(file_name, name='file_name', index=0)
stars.remove_columns(['x', 'y', 'var_x', 'var_y', 'var_x_y', 'var_ra', 'var_dec', 'var_ra_dec'])
stars = stars.to_pandas()
if stars_counter == 0:
# make new file
stars.to_csv(output_stars, mode='w', index=False)
stars_counter += 1
else:
# append to existing file
stars.to_csv(output_stars, mode='a', index=False, header=False)
new_metadata_columns = Table({
'file_name': [file_name],
'zooniverse_subject_id': [subject_id],
'number_galaxy_centers': [len(centers)],
'number_foreground_stars': [len(stars)],
'observed': [manga_id in observed_ids]
})
new_metadata = hstack([new_metadata_columns, metadata])
new_metadata = new_metadata.to_pandas()
if metadata_counter == 0:
# make new file
new_metadata.to_csv(output_metadata, mode='w', index=False)
metadata_counter += 1
else:
# append to existing file
new_metadata.to_csv(output_metadata, mode='a', index=False, header=False)
# strip user_name and user_id
for hdu_index in [8, 9, 10]:
raw_table = Table(hdu_list[hdu_index].data)
raw_table.remove_columns(['user_name', 'user_id'])
hdu_list[hdu_index] = fits.table_to_hdu(raw_table)
output_name = os.path.join(output_folder, file_name)
hdu_list.writeto(output_name)
# compress the fits file
call(['gzip', output_name])
hdu_list.close()