-
Notifications
You must be signed in to change notification settings - Fork 1
/
converter.py
342 lines (302 loc) · 14.4 KB
/
converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""Tools for dataset conversion to IDF"""
import ftplib
import logging
import os
import os.path
import re
import shutil
import subprocess
import tarfile
import tempfile
from contextlib import closing
from ..base import ConversionError, ConversionManager, Converter, ParameterSelector
AUXILIARY_PATH = os.path.join(os.path.expanduser('~'), '.geospaas', 'auxiliary')
logger = logging.getLogger(__name__)
class IDFConversionManager(ConversionManager):
"""Manager for IDF conversion"""
downloaded_aux = False
@staticmethod
def make_symlink(source, destination):
"""Create a symbolic link at `destination` pointing to `source`
If the destination already exists, it is deleted and replaced
with the symlink
"""
if not os.path.islink(destination):
if os.path.isdir(destination):
shutil.rmtree(destination)
elif os.path.isfile(destination):
os.remove(destination)
os.symlink(source, destination)
@classmethod
def download_auxiliary_files(cls, auxiliary_path):
"""Download the auxiliary files necessary for IDF conversion.
They are too big to be included in the package.
"""
if not (cls.downloaded_aux or os.path.isdir(auxiliary_path)):
logger.info("Downloading auxiliary files for IDF conversion, this may take a while")
os.makedirs(auxiliary_path)
try:
with closing(ftplib.FTP('ftp.nersc.no')) as ftp:
ftp.login()
# we write the archive to a tmp file...
with tempfile.TemporaryFile() as tmp_file:
ftp.retrbinary(
'RETR /pub/Adrien/idf_converter_auxiliary.tar', tmp_file.write)
# ...then set the cursor back at the beginning of
# the file...
tmp_file.seek(0)
# ...and finally extract the contents of the
# archive to the auxiliary folder
with tarfile.TarFile(fileobj=tmp_file) as tar_file:
tar_file.extractall(auxiliary_path)
except (*ftplib.all_errors, tarfile.ExtractError):
# in case of error, we just remove everything
shutil.rmtree(auxiliary_path)
raise
cls.downloaded_aux = True
cls.make_symlink(auxiliary_path, os.path.join(os.path.dirname(__file__), 'auxiliary'))
def __init__(self, *args, **kwargs):
download_auxiliary = kwargs.pop('download_auxiliary', True)
super().__init__(*args, **kwargs)
if download_auxiliary: # pragma: no cover
self.download_auxiliary_files(AUXILIARY_PATH)
class IDFConverter(Converter):
"""Base class for IDF converters. Uses the idf_converter package
from ODL for the actual conversion. The child classes deal with
the configuration files and gathering the results
"""
PARAMETERS_DIR = os.path.join(os.path.dirname(__file__), 'parameters')
PARAMETER_SELECTORS = (ParameterSelector(matches=lambda d: False, parameter_files=tuple()),)
def __init__(self, parameter_files):
self.parameter_paths = [
os.path.join(self.PARAMETERS_DIR, parameter_file) for parameter_file in parameter_files
]
def run(self, in_file, out_dir, **kwargs):
"""Run the IDF converter"""
input_cli_args = ['-i', 'path', '=', in_file]
results = []
for parameter_path in self.parameter_paths:
logger.debug(
"Converting %s to IDF using parameter file %s", in_file, parameter_path)
with tempfile.TemporaryDirectory() as tmp_dir:
output_cli_args = ['-o', 'path', '=', tmp_dir]
try:
# run the idf-converter tool. The output is in a temporary directory
process = subprocess.run(
['idf-converter', f"{parameter_path}@", *input_cli_args, *output_cli_args],
cwd=os.path.dirname(__file__), check=True, capture_output=True
)
except subprocess.CalledProcessError as error:
raise ConversionError(
f"Conversion failed with the following message: {error.stderr}") from error
# if the file was skipped, raise an exception
stderr = str(process.stderr)
if 'Skipping this file' in stderr:
raise ConversionError((
f"Could not convert {os.path.basename(in_file)}\n{stderr}: "
"the file was skipped the idf-converter"))
# at this point it is safe to assume that the
# conversion went well. We move the results to
# the permanent output directory
results.extend(self.move_results(tmp_dir, out_dir))
return results
class MultiFilesIDFConverter(IDFConverter):
"""Base class for converters which need to run the conversion on
multiple files
"""
@staticmethod
def list_files_to_convert(dataset_file_path):
"""Returns the list of dataset paths on which the converter
needs to be called
"""
raise NotImplementedError
def run(self, in_file, out_dir, **kwargs):
"""calls the IDFConverter.run() method on all dataset files
contained returned by list_files_to_convert()
"""
subdatasets = self.list_files_to_convert(in_file)
if not subdatasets:
raise ConversionError(f"No files to convert were found for {in_file}")
results = []
for dataset_file in subdatasets:
for result in super().run(dataset_file, out_dir):
results.append(result)
return results
@IDFConversionManager.register()
class Sentinel1IDFConverter(MultiFilesIDFConverter):
"""IDF converter for Sentinel-1 datasets"""
PARAMETER_SELECTORS = (
ParameterSelector(
matches=lambda d: re.match('^S1[AB]_[A-Z0-9]{2}_OCN.*$', d.entry_id),
parameter_files=('sentinel1_l2_rvl',)),
ParameterSelector(
matches=lambda d: re.match('^S1[AB]_[A-Z0-9]{2}_(GRD|SLC).*$', d.entry_id),
parameter_files=('sentinel1_l1',)),
)
@staticmethod
def list_files_to_convert(dataset_file_path):
"""Returns the path to the 'measurement' directory of the
dataset
"""
measurement_dir = os.path.join(dataset_file_path, 'measurement')
try:
return [
os.path.join(measurement_dir, path)
for path in os.listdir(measurement_dir)
]
except (FileNotFoundError, NotADirectoryError) as error:
raise ConversionError(
f"Could not find a measurement directory inside {dataset_file_path}") from error
@IDFConversionManager.register()
class Sentinel2IDFConverter(MultiFilesIDFConverter):
"""IDF converter for Sentinel-2 datasets"""
PARAMETER_SELECTORS = (
ParameterSelector(
matches=lambda d: re.match('^S2[AB]_MSIL1C.*$', d.entry_id),
parameter_files=('sentinel2_l1',)),
)
@staticmethod
def list_files_to_convert(dataset_file_path):
"""Returns the path to the granule L1C directory of the
dataset
"""
measurement_dir = os.path.join(dataset_file_path, 'GRANULE')
try:
return [
os.path.join(measurement_dir, path)
for path in os.listdir(measurement_dir)
if path.startswith('L1C')
]
except (FileNotFoundError, NotADirectoryError) as error:
raise ConversionError(
f"Could not find a GRANULE directory inside {dataset_file_path}") from error
@IDFConversionManager.register()
class Sentinel3SLSTRL2WSTIDFConverter(MultiFilesIDFConverter):
"""IDF converter for Sentinel 3 SLSTR L2 WST datasets"""
PARAMETER_SELECTORS = (
ParameterSelector(matches=lambda d: re.match('^S3[AB]_SL_2_WST.*$', d.entry_id),
parameter_files=('sentinel3_slstr_l2_wst',)),
)
@staticmethod
def list_files_to_convert(dataset_file_path):
try:
return [
os.path.join(dataset_file_path, path)
for path in os.listdir(dataset_file_path)
if path.endswith('.nc')
]
except (FileNotFoundError, NotADirectoryError) as error:
raise ConversionError(
f"Could not find any dataset files in {dataset_file_path}") from error
@IDFConversionManager.register()
class SingleResultIDFConverter(IDFConverter):
"""IDF converter for readers which produce a single output folder
"""
PARAMETER_SELECTORS = (
ParameterSelector(
matches=lambda d: d.entry_id.startswith('nrt_global_allsat_phy_l4_'),
parameter_files=('cmems_008_046',)),
ParameterSelector(
matches=lambda d: d.entry_id.startswith('GL_TS_DC_'),
parameter_files=('cmems_013_048_drifter_0m', 'cmems_013_048_drifter_15m')),
ParameterSelector(
matches=lambda d: d.entry_id.startswith('GL_TS_DB_'),
parameter_files=('cmems_013_030_drifter_0m', 'cmems_013_030_drifter_15m')),
ParameterSelector(
lambda d: re.match(r'^mercatorbiomer4v2r1_global_mean_[0-9]{8}$', d.entry_id),
parameter_files=('cmems_001_028_daily_mean_0.5m',)),
ParameterSelector(
matches=lambda d: re.match(
'^D[0-9]{3}-ESACCI-L4_GHRSST-SSTdepth-OSTIA-GLOB_CDR2\.1-v02\.0-fv01\.0$',
d.entry_id),
parameter_files=('esa_cci_sst',)),
ParameterSelector(
matches=lambda d: d.entry_id.endswith('-JPL-L2P_GHRSST-SSTskin-MODIS_A-D-v02.0-fv01.0'),
parameter_files=('ghrsst_l2p_modis_a_day',)),
ParameterSelector(
matches=lambda d: d.entry_id.endswith('-JPL-L2P_GHRSST-SSTskin-MODIS_A-N-v02.0-fv01.0'),
parameter_files=('ghrsst_l2p_modis_a_night',)),
ParameterSelector(
matches=lambda d: '-JPL-L2P_GHRSST-SSTskin-VIIRS' in d.entry_id,
parameter_files=('ghrsst_l2p_viirs_jpl_sst',)),
ParameterSelector(
matches=lambda d: '-NAVO-L2P_GHRSST-SST1m-VIIRS' in d.entry_id,
parameter_files=('ghrsst_l2p_viirs_navo_sst',)),
ParameterSelector(
matches=lambda d: ('OSPO-L2P_GHRSST-SSTsubskin-VIIRS' in d.entry_id or
'-STAR-L2P_GHRSST-SSTsubskin-VIIRS' in d.entry_id),
parameter_files=('ghrsst_l2p_viirs_ospo_sst',)),
ParameterSelector(
matches=lambda d: '-OSISAF-L3C_GHRSST-SSTsubskin-AVHRR_SST_METOP_B_GLB-' in d.entry_id,
parameter_files=('ghrsst_l3c_avhrr_metop_b_sst',)),
ParameterSelector(
matches=lambda d: '-STAR-L3C_GHRSST-SSTsubskin-ABI_G16-' in d.entry_id,
parameter_files=('ghrsst_l3c_goes16_sst',)),
ParameterSelector(
matches=lambda d: '-STAR-L3C_GHRSST-SSTsubskin-ABI_G17-' in d.entry_id,
parameter_files=('ghrsst_l3c_goes17_sst',)),
ParameterSelector(
matches=lambda d: '-OSISAF-L3C_GHRSST-SSTsubskin-SEVIRI_SST-' in d.entry_id,
parameter_files=('ghrsst_l3c_seviri_atlantic_sst',)),
ParameterSelector(
matches=lambda d: '-OSISAF-L3C_GHRSST-SSTsubskin-SEVIRI_IO_SST-' in d.entry_id,
parameter_files=('ghrsst_l3c_seviri_indian_sst',)),
ParameterSelector(
matches=lambda d: d.entry_id.startswith('hycom_glb_sfc_u_'),
parameter_files=('hycom_osu',)),
ParameterSelector(
matches=lambda d: '/rtofs_glo_2ds_' in d.entry_id and d.entry_id.endswith('_diag'),
parameter_files=('rtofs_diagnostic',)),
ParameterSelector(
matches=lambda d: '/rtofs_glo_2ds_' in d.entry_id and d.entry_id.endswith('_prog'),
parameter_files=('rtofs_prognostic',)),
ParameterSelector(
matches=lambda d: re.match('^S3[AB]_OL_1_EFR.*$', d.entry_id),
parameter_files=('sentinel3_olci_l1_efr',)),
ParameterSelector(
matches=lambda d: re.match('^S3[AB]_OL_2_WFR.*$', d.entry_id),
parameter_files=('sentinel3_olci_chl',)),
ParameterSelector(
matches=lambda d: re.match('^S3[AB]_SL_1_RBT.*$', d.entry_id),
parameter_files=('sentinel3_slstr_sst',)),
)
@IDFConversionManager.register()
class MultiResultFoldersIDFConverter(IDFConverter):
"""IDF converter for CMEMS readers which produce multiple result
folders from one dataset file
"""
PARAMETER_SELECTORS = (
ParameterSelector(
matches=lambda d: d.entry_id.startswith('mercatorpsy4v3r1_gl12_hrly'),
parameter_files=('cmems_001_024_hourly_mean_surface',)),
ParameterSelector(
matches=lambda d: d.entry_id.startswith('SMOC_'),
parameter_files=('cmems_001_024_hourly_smoc',)),
ParameterSelector(
matches=lambda d: d.entry_id.startswith('dataset-uv-nrt-hourly_'),
parameter_files=('cmems_015_003_0m', 'cmems_015_003_15m')),
ParameterSelector(
matches=lambda d: d.entry_id.startswith('GL_TV_HF_'),
parameter_files=('cmems_013_048_radar_total',)),
ParameterSelector(
matches=lambda d: '-REMSS-L3U_GHRSST-SSTsubskin-AMSR2-' in d.entry_id,
parameter_files=('ghrsst_l3u_amsr2_sst',)),
ParameterSelector(
matches=lambda d: '-REMSS-L3U_GHRSST-SSTsubskin-GMI-' in d.entry_id,
parameter_files=('ghrsst_l3u_gmi_sst',)),
ParameterSelector(
matches=lambda d: 'CMEMS_v5r1_IBI_PHY_NRT_PdE_01hav_' in d.entry_id,
parameter_files=('ibi_hourly_mean_surface',)),
ParameterSelector(
matches=lambda d: '_hts-CMCC--RFVL-MFSeas6-MEDATL-' in d.entry_id,
parameter_files=('mfs_med-cmcc-cur',)),
ParameterSelector(
matches=lambda d: '_hts-CMCC--ASLV-MFSeas6-MEDATL-' in d.entry_id,
parameter_files=('mfs_med-cmcc-ssh',)),
ParameterSelector(
matches=lambda d: '_hts-CMCC--TEMP-MFSeas6-MEDATL-' in d.entry_id,
parameter_files=('mfs_med-cmcc-temp',)),
ParameterSelector(
matches=lambda d: 'mfwamglocep_' in d.entry_id,
parameter_files=('meteofrance_model_mfwam',)),
)