From 55e7c0ad3608763b542e8e985f85300e11b761f0 Mon Sep 17 00:00:00 2001
From: Mitchell Revalski <56605543+mrevalski@users.noreply.github.com>
Date: Wed, 11 Dec 2024 12:06:45 -0500
Subject: [PATCH] updated functions in mast_api_psf
---
.../hst_point_spread_function.ipynb | 18 +-
.../point_spread_function/mast_api_psf.py | 277 +++++++++++++-----
2 files changed, 220 insertions(+), 75 deletions(-)
diff --git a/notebooks/WFC3/point_spread_function/hst_point_spread_function.ipynb b/notebooks/WFC3/point_spread_function/hst_point_spread_function.ipynb
index 32ea72e52..a2f1e2512 100644
--- a/notebooks/WFC3/point_spread_function/hst_point_spread_function.ipynb
+++ b/notebooks/WFC3/point_spread_function/hst_point_spread_function.ipynb
@@ -984,7 +984,7 @@
"\n",
"***\n",
"\n",
- "The example above demonstrates how to extract and stack stars from a provided exposure. However, in many cases there may be an insufficient number of stars to create a high-quality median stack. In those cases, users can utilize the MAST cutout database described in [WFC3 ISR 2021-12](https://www.stsci.edu/files/live/sites/www/files/home/hst/instrumentation/wfc3/documentation/instrument-science-reports-isrs/_documents/2021/ISR_2021_12.pdf), with access instructions provided on the [PSF Search webpage](https://www.stsci.edu/hst/instrumentation/wfc3/data-analysis/psf/psf-search). The MAST API commands are listed on [this webpage](https://mast.stsci.edu/api/v0/pyex.html#MastCatalogsFilteredWfc3PsfUvisPy), with parameter options on [this webpage](https://mast.stsci.edu/api/v0/_w_f_c3__p_s_ffields.html). The examples below utilize sections of code from the `download_psf_cutouts.ipynb` developed by Dauphin et al. (*in preparation*). In this case, we set limits on the x and y detector locations, quality of fit, exposure time, isolation index, integrated flux (in electrons for WFC3/UVIS and WFPC2, and in electrons per second for WFC3/IR), the central pixel flux, sky flux, and exclude subarrays. The stellar centroids provided in the MAST database were calculated using hst1pass and the empirical PSF models described in [Section 2.2](#empirical). See [WFC3 ISR 2021-12](https://www.stsci.edu/files/live/sites/www/files/home/hst/instrumentation/wfc3/documentation/instrument-science-reports-isrs/_documents/2021/ISR_2021_12.pdf) for additional information.\n",
+ "The example above demonstrates how to extract and stack stars from a provided exposure. However, in many cases there may be an insufficient number of stars to create a high-quality median stack. In those cases, users can utilize the MAST cutout database described in [WFC3 ISR 2021-12](https://www.stsci.edu/files/live/sites/www/files/home/hst/instrumentation/wfc3/documentation/instrument-science-reports-isrs/_documents/2021/ISR_2021_12.pdf), with access instructions provided on the [PSF Search webpage](https://www.stsci.edu/hst/instrumentation/wfc3/data-analysis/psf/psf-search). The MAST API commands are listed on [this webpage](https://mast.stsci.edu/api/v0/pyex.html#MastCatalogsFilteredWfc3PsfUvisPy), with parameter options on [this webpage](https://mast.stsci.edu/api/v0/_w_f_c3__p_s_ffields.html). The examples below utilize sections of code from the `download_psf_cutouts.ipynb` developed by Dauphin et al. (2024) that is available on [Github](https://spacetelescope.github.io/hst_notebooks/notebooks/WFC3/mast_api_psf/download_psf_cutouts.html). In this case, we set limits on the x and y detector locations, quality of fit, exposure time, isolation index, integrated flux (in electrons for WFC3/UVIS and WFPC2, and in electrons per second for WFC3/IR), the central pixel flux, sky flux, and exclude subarrays. The stellar centroids provided in the MAST database were calculated using hst1pass and the empirical PSF models described in [Section 2.2](#empirical). See [WFC3 ISR 2021-12](https://www.stsci.edu/files/live/sites/www/files/home/hst/instrumentation/wfc3/documentation/instrument-science-reports-isrs/_documents/2021/ISR_2021_12.pdf) for additional information.\n",
"\n",
"
NOTE: We explicitly assume that users have familiarized themselves with the contents of the \"download_psf_cutouts.ipynb\" notebook for this section.
"
]
@@ -1032,7 +1032,7 @@
"}\n",
"\n",
"filts = mast_api_psf.set_filters(parameters)\n",
- "columns = ['id', 'rootname', 'filter', 'chip', 'exptime', 'psf_x_center', 'psf_y_center', 'pixc', 'sky', 'qfit', 'iso_index', 'subarray']\n",
+ "columns = ['id', 'rootname', 'filter', 'chip', 'exptime', 'psf_x_center', 'psf_y_center', 'pixc', 'sky', 'qfit', 'iso_index', 'subarray', 'x_raw', 'y_raw', 'x_cal', 'y_cal']\n",
"obs = mast_api_psf.mast_query_psf_database(detector=detector, filts=filts, columns=columns)\n",
"obs"
]
@@ -1042,7 +1042,7 @@
"id": "55d3e120-393d-45e4-937a-0d9f6f7613ef",
"metadata": {},
"source": [
- "As described and detailed in the `download_psf_cutouts.ipynb`, the below cell constructs the filepaths for the cutouts, requests to download them from the MAST cutout service, and then extracts the files from a compressed tar folder. Finally, the filepaths for each cutout are saved to a list in `path_data` and passed to an array."
+ "As described and detailed in the [`download_psf_cutouts.ipynb`](https://spacetelescope.github.io/hst_notebooks/notebooks/WFC3/mast_api_psf/download_psf_cutouts.html), the below cell constructs the filepaths for the cutouts, requests to download them from the MAST cutout service, and then extracts the files from a compressed tar folder. Finally, the filepaths for each cutout are saved to a list in `path_data` and passed to an array."
]
},
{
@@ -1055,7 +1055,8 @@
"os.chdir(data_dir)\n",
"file_suffix = ['flc']\n",
"dataURIs = mast_api_psf.make_dataURIs(obs, detector=detector, file_suffix=file_suffix)\n",
- "filename = mast_api_psf.download_request(dataURIs, filename='mastDownload.tar.gz', download_type='bundle.tar.gz')\n",
+ "# filename = mast_api_psf.download_request(dataURIs, filename='mastDownload.tar.gz', download_type='bundle.tar.gz')\n",
+ "filename = filename = mast_api_psf.download_request_bundle(dataURIs, filename='mastDownload.tar.gz')\n",
"tar = tarfile.open(filename, 'r:gz')\n",
"path_mast = tar.getnames()[0]\n",
"tar.extractall()\n",
@@ -1502,7 +1503,7 @@
"\n",
"**Author:** Mitchell Revalski
\n",
"**Created:** 15 Apr 2024
\n",
- "**Updated:** 05 Jun 2024
\n",
+ "**Updated:** 11 Dec 2024
\n",
"**Source:** [https://github.com/spacetelescope/hst_notebooks](https://github.com/spacetelescope/hst_notebooks)\n",
"\n",
"\n",
@@ -1528,6 +1529,11 @@
"* [Citing `matplotlib`](https://matplotlib.org/stable/users/project/citing.html)\n",
"* [Citing `numpy`](https://numpy.org/citing-numpy/)\n",
"* [Citing `photutils`](https://photutils.readthedocs.io/en/stable/getting_started/citation.html)\n",
+ "\n",
+ "\n",
+ "### Version History\n",
+ "- 05 Jun 2024: First release of the `hst_point_spread_function.ipynb` notebook, utilizing `astropy v6.0.1`, `numpy v1.26.4`, and `photutils v1.12.0`.\n",
+ "- 11 Dec 2024: Updated the functions in `mast_api_psf.py`, and the corresponding function calls in the notebook, to match those published in [download_psf_cutouts.ipynb](https://spacetelescope.github.io/hst_notebooks/notebooks/WFC3/mast_api_psf/download_psf_cutouts.html).\n",
"***\n",
"\n",
"[Top of Page](#top)\n",
@@ -1552,7 +1558,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.7"
+ "version": "3.11.9"
}
},
"nbformat": 4,
diff --git a/notebooks/WFC3/point_spread_function/mast_api_psf.py b/notebooks/WFC3/point_spread_function/mast_api_psf.py
index 117fb750a..d235e5bf5 100644
--- a/notebooks/WFC3/point_spread_function/mast_api_psf.py
+++ b/notebooks/WFC3/point_spread_function/mast_api_psf.py
@@ -16,12 +16,20 @@
Author
------
-Fred Dauphin, February 2024
+Fred Dauphin, July 2024
"""
+import datetime
+import multiprocessing
+import os
import requests
+
+import tqdm
+from astropy.io import fits
from astroquery.mast import Mast
+REQUEST_URL_PREFIX = 'https://mast.stsci.edu/api/v0.1/Download'
+
# Helper functions from https://mast.stsci.edu/api/v0/pyex.html
def set_filters(parameters):
@@ -42,37 +50,121 @@ def set_min_max(min, max):
return [{'min': min, 'max': max}]
-def download_request(payload, filename, download_type="file"):
+# Downloading functions
+def download_request_file(dataURI_filename):
+ """
+ Performs a get request to download a specified file from the MAST server.
+
+ This function is intended for downloading single cutouts. The load and
+ download limits for a single query are 50,000 and 500,000, respectively.
+ The file is intended to be downloaded as a .fits:
+
+ Parameters
+ ----------
+ dataURI_filename : list
+ The dataURI to be downloaded and the name of the downloaded fits file.
+ This is one parameter instead of two so a progress bar can be applied
+ to multiprocessing.
+
+ Returns
+ -------
+ filename : str
+ The name of the downloaded file.
+ """
+ dataURI = dataURI_filename[0]
+ filename = dataURI_filename[1]
+
+ # Specify download type
+ download_type = 'file'
+ request_url = f'{REQUEST_URL_PREFIX}/{download_type}'
+
+ # Request payload
+ payload = {'uri': dataURI}
+ resp = requests.get(request_url, params=payload)
+
+ # Write response to filename
+ with open(filename, 'wb') as FLE:
+ FLE.write(resp.content)
+
+ return filename
+
+
+def download_request_pool(dataURIs, cpu_count=0):
+ """
+ Performs a get request to download a specified file from the MAST server.
+
+ This function is intended for downloading multiple cutouts. The load and
+ download limits for a single query are 50,000 and 500,000, respectively.
+ This function is optimized by pooling and shows a progress bar.
+
+ Parameters
+ ----------
+ dataURIs : list
+ The dataURIs to be downloaded.
+
+ cpu_count : int, default=0
+ The number of cpus for multiprocessing. If 0, set to all available cpus.
+
+ Returns
+ -------
+ path_dir : str
+ The directory path to the downloaded cutouts.
+ """
+ # Make PSF directory if necessary for downloads
+ now = datetime.datetime.now().strftime('MAST_%Y-%m-%dT%H%M')
+ if 'WFC3' in dataURIs[0]:
+ ins_psf = 'WFC3PSF'
+ else:
+ ins_psf = 'WFPC2PSF'
+ path_dir = f'{now}/{ins_psf}'
+ if not os.path.isdir(path_dir):
+ os.makedirs(path_dir)
+
+ # Prepare arguments for pooling
+ filenames = [f'{path_dir}/{dataURI.split("/")[-1]}' for dataURI in dataURIs]
+ args = zip(dataURIs, filenames)
+
+ # Pool using a progress bar
+ if cpu_count == 0:
+ cpu_count = os.cpu_count()
+ total = len(filenames)
+ pool = multiprocessing.Pool(processes=cpu_count)
+ _ = list(tqdm.tqdm(pool.imap(download_request_file, args), total=total))
+ pool.close()
+ pool.join()
+
+ return path_dir
+
+
+def download_request_bundle(dataURIs, filename):
"""
Performs a get request to download a specified file from the MAST server.
- The load and download limits for a single query are 50,000 and 500,000,
- respectively. It is recommended to download all files as a .tar.gz:
- ```
- download_requests(payload=payload,
- filename='filename.tar.gz',
- download_type='bundle.tar.gz')
- ```
+ This function is intended for downloading multiple cutouts. The load and
+ download limits for a single query are 50,000 and 500,000, respectively.
+ The file downloaded is a .tar.gz:
Parameters
----------
- payload : list
+ dataURIs : list
The dataURIs to be downloaded.
filename : str
- The name of the downloaded file. To download a .tar.gz (recommended),
- include '.tar.gz' as the file extension.
- download_type : str, default="file"
- The type of file to download. To download a .tar.gz (recommended), use
- 'bundle.tar.gz'.
+ The name of the downloaded '.tar.gz' file.
Returns
-------
filename : str
The name of the downloaded file.
"""
- request_url = 'https://mast.stsci.edu/api/v0.1/Download/' + download_type
+ # Specify download type
+ download_type = 'bundle.tar.gz'
+ request_url = f'{REQUEST_URL_PREFIX}/{download_type}'
+
+ # Request payload
+ payload = [("uri", dataURI) for dataURI in dataURIs]
resp = requests.post(request_url, data=payload)
-
+
+ # Write response to filename
with open(filename, 'wb') as FLE:
FLE.write(resp.content)
@@ -110,28 +202,27 @@ def mast_query_psf_database(detector, filts, columns=['*']):
columns applied.
"""
# Check types
- if type(detector) is not str:
+ if not isinstance(detector, str):
raise TypeError('detector must be a string.')
- if type(filts) is not list:
+ if not isinstance(filts, list):
raise TypeError('filts must be a list.')
- if type(columns) is not list:
+ if not isinstance(columns, list):
raise TypeError('columns must be a list.')
- # Check detectors
- valid_detectors = ['UVIS', 'IR', 'WFPC2']
+ # Determine service for database
detector = detector.upper()
- if detector not in valid_detectors:
+ service_base = 'Mast.Catalogs.Filtered'
+ detector_databases = {
+ 'UVIS': 'Wfc3Psf.Uvis',
+ 'IR': 'Wfc3Psf.Ir',
+ 'WFPC2': 'Wfpc2Psf.Uvis'
+ }
+ try:
+ database = detector_databases[detector]
+ except KeyError:
+ valid_detectors = list(detector_databases.keys())
raise ValueError(f'{detector} is not a valid detector. '
f'Choose from {valid_detectors}.')
-
- # Determine service for database
- service_base = 'Mast.Catalogs.Filtered'
- if detector == 'UVIS':
- database = 'Wfc3Psf.Uvis'
- elif detector == 'IR':
- database = 'Wfc3Psf.Ir'
- else:
- database = 'Wfpc2Psf.Uvis'
service = f'{service_base}.{database}'
# If WFPC2, change filter to filter_1
@@ -157,7 +248,7 @@ def mast_query_psf_database(detector, filts, columns=['*']):
return obs
-def make_dataURIs(obs, detector, file_suffix, sizes={'unsat': 51, 'sat': 101}):
+def make_dataURIs(obs, detector, file_suffix, unsat_size=51, sat_size=101):
"""
Make dataURIs for the WFC3 and WFPC2 PSF databases' sources.
@@ -178,21 +269,21 @@ def make_dataURIs(obs, detector, file_suffix, sizes={'unsat': 51, 'sat': 101}):
The detector of the queried sources. Allowed values are UVIS, IR, and
WFPC2.
file_suffix : list
- The file suffixes to prepare for download.
- sizes : dict, default={'unsat':51, 'sat':101}
- The sizes for unsaturated (qfit>0;n_sat_pixels==0) and saturated
- (qfit==0;n_sat_pixels>0) cutouts.
+ The file suffixes to prepare for download. Allowed values are raw, d0m,
+ flt, c0m, and flc.
+ unsat_size : int, default=51
+ The size for unsaturated (qfit>0;n_sat_pixels==0) cutouts.
+ sat_size : int, default=101
+ The size for saturated (qfit==0;n_sat_pixels>0) cutouts.
Returns
-------
dataURIs : list
- The dataURIs made from the queried sources as ('uri', dataURI).
+ The dataURIs made from the queried sources.
"""
# Check type
- if type(file_suffix) is not list:
+ if not isinstance(file_suffix, list):
raise TypeError('detector must be a list.')
- if type(sizes) is not dict:
- raise TypeError('sizes must be a dictionary.')
# Check suffixes (make sure there isn't a wrong suffix)
valid_suffixes = ['raw', 'd0m', 'flt', 'c0m', 'flc']
@@ -200,13 +291,6 @@ def make_dataURIs(obs, detector, file_suffix, sizes={'unsat': 51, 'sat': 101}):
if suffix not in valid_suffixes:
raise ValueError(f'{suffix} is not a valid suffix. '
f'Choose from {valid_suffixes}.')
-
- # Check sizes (make sure unsat and sat are in sizes)
- valid_sizes = ['unsat', 'sat']
- for size in valid_sizes:
- if size not in sizes.keys():
- raise ValueError(f'{size} needs to be included. '
- f'Choose an appropriate value.')
# Determine database that was queried
detector = detector.upper()
@@ -219,9 +303,7 @@ def make_dataURIs(obs, detector, file_suffix, sizes={'unsat': 51, 'sat': 101}):
# Loop through obs to make dataURIs
dataURIs = []
- pixel_offset = 1 # centers sources
- mask_full_frame = (obs['subarray'] == 0).data # only support full frame
- for row in obs[mask_full_frame]:
+ for row in tqdm.tqdm(obs, total=len(obs)):
# Unpack values
iden = row['id']
root = row['rootname']
@@ -229,35 +311,92 @@ def make_dataURIs(obs, detector, file_suffix, sizes={'unsat': 51, 'sat': 101}):
filt = row['filter_1']
else:
filt = row['filter']
- x = row['psf_x_center'] - pixel_offset
- y = row['psf_y_center'] - pixel_offset
chip = row['chip']
qfit = row['qfit']
if qfit > 0:
- size = sizes['unsat']
+ size = unsat_size
else:
- size = sizes['sat']
-
- # If UVIS use chip to asign correct sci ext
+ size = sat_size
+ subarray = row['subarray']
+
+ # If UVIS use chip to assign correct fits ext
if detector == 'UVIS':
- if chip == '2':
- sci_ext = 1
- elif chip == '1':
- sci_ext = 4
- if y >= 2051:
- y -= 2051 - 3 # another offset to center UVIS1 sources
- # Else chip is the correct sci ext
+ if chip == '1' and subarray == 0:
+ fits_ext = 4
+ else:
+ fits_ext = 1
+ # Else chip is the correct fits ext
else:
- sci_ext = chip
+ fits_ext = chip
# Make dataURIs for each suffix
for suffix in file_suffix:
- file_read = f'red={root}_{suffix}[{sci_ext}]'
+ if suffix in ['raw', 'd0m']:
+ coord_suffix = 'raw'
+ else:
+ coord_suffix = 'cal'
+ x = row[f'x_{coord_suffix}']
+ y = row[f'y_{coord_suffix}']
+
+ file_read = f'red={root}_{suffix}[{fits_ext}]'
cutout = f'size={size}&x={x}&y={y}&format=fits'
file_save = f'{root}_{iden}_{filt}_{suffix}_cutout.fits'
dataURI = f'{dataURI_base}?{file_read}&{cutout}/{file_save}'
- dataURIs.append(("uri", dataURI))
+ dataURIs.append(dataURI)
- n_subarray_sources = (~mask_full_frame).sum()
- print(f'Found {n_subarray_sources} subarray sources in queried data.')
return dataURIs
+
+
+def convert_dataURIs_to_dataURLs(dataURIs):
+ """
+ Convert dataURIs to URLs for the WFC3 and WFPC2 PSF databases' sources.
+
+ Use the archive url, the hla folder, and the imagename parameter.
+
+ Parameters
+ ----------
+ dataURIs : list
+ The dataURIs made from the queried sources.
+
+ Returns
+ -------
+ dataURLs : list
+ The dataURLs for the queried sources.
+ """
+ # Convert to dataURLs
+ dataURL_base = 'https://archive.stsci.edu/cgi-bin/hla'
+ dataURLs = []
+ for dataURI in tqdm.tqdm(dataURIs, total=len(dataURIs)):
+ dataURL_split = dataURI.split('/')
+ file_cutout = f'{dataURL_split[3]}&imagename={dataURL_split[4]}'
+ dataURL = f'{dataURL_base}/{file_cutout}'
+ dataURLs.append(dataURL)
+ return dataURLs
+
+
+def extract_cutouts_pool(dataURLs, cpu_count=0):
+ """
+ Extract cutouts from dataURLs using multiprocessing.
+
+ Parameters
+ ----------
+ dataURIs : list
+ The dataURLs made from the queried sources.
+ cpu_count : int, default=0
+ The number of cpus for multiprocessing. If 0, set to all available cpus.
+
+ Returns
+ -------
+ cutouts : list
+ The queried sources.
+ """
+ # Pool using a progress bar
+ if cpu_count == 0:
+ cpu_count = os.cpu_count()
+ total = len(dataURLs)
+ pool = multiprocessing.Pool(processes=cpu_count)
+ cutouts = list(tqdm.tqdm(pool.imap(fits.getdata, dataURLs), total=total))
+ pool.close()
+ pool.join()
+
+ return cutouts
\ No newline at end of file