From 667386dcc6e2446115312e2eb2c22901b84817f8 Mon Sep 17 00:00:00 2001 From: Christina Hedges Date: Thu, 2 Jan 2025 17:35:04 -0500 Subject: [PATCH] added configuration parameter --- src/lksearch/MASTSearch.py | 51 +++++++++++++++++++++++++---------- src/lksearch/__init__.py | 8 ++++++ src/lksearch/catalogsearch.py | 32 +++++++++++++--------- 3 files changed, 65 insertions(+), 26 deletions(-) diff --git a/src/lksearch/MASTSearch.py b/src/lksearch/MASTSearch.py index e2fcecd..ab2ef29 100644 --- a/src/lksearch/MASTSearch.py +++ b/src/lksearch/MASTSearch.py @@ -1112,6 +1112,28 @@ def _download_one( # check to see if a cloud_uri exists, if so we just pass that download = True + if not conf.CHECK_CACHED_FILE_SIZES: + # If this configuration parameter is set and the file exists + # in the cache, we do not search for it + local_path = "/".join( + [ + config.get_cache_dir(), + "mastDownload", + row["obs_collection"], + row["obs_id"], + row["productFilename"], + ] + ) + if os.path.isfile(local_path): + manifest = pd.DataFrame( + { + "Local Path": [local_path], + "Status": ["UNKNOWN"], + "Message": [None], + "URL": [None], + } + ) + return manifest if not conf.DOWNLOAD_CLOUD: if pd.notna(row["cloud_uri"]): download = False @@ -1189,18 +1211,19 @@ def download( ] manifest = pd.concat(manifest) - status = manifest["Status"] != "COMPLETE" - if np.any(status): - warnings.warn( - "Not All Files Downloaded Successfully, Check Returned Manifest.", - SearchWarning, - ) - if remove_incomplete: - for file in manifest.loc[status]["Local Path"].values: - if os.path.isfile(file): - os.remove(file) - warnings.warn(f"Removed {file}", SearchWarning) - else: - warnings.warn(f"Not a file: {file}", SearchWarning) - manifest = manifest.reset_index(drop=True) + if conf.CHECK_CACHED_FILE_SIZES: + status = manifest["Status"] != "COMPLETE" + if np.any(status): + warnings.warn( + "Not All Files Downloaded Successfully, Check Returned Manifest.", + SearchWarning, + ) + if remove_incomplete: + for file in manifest.loc[status]["Local Path"].values: + if os.path.isfile(file): + os.remove(file) + warnings.warn(f"Removed {file}", SearchWarning) + else: + warnings.warn(f"Not a file: {file}", SearchWarning) + manifest = manifest.reset_index(drop=True) return manifest diff --git a/src/lksearch/__init__.py b/src/lksearch/__init__.py index 0b2d66d..229f864 100644 --- a/src/lksearch/__init__.py +++ b/src/lksearch/__init__.py @@ -77,6 +77,14 @@ class Conf(_config.ConfigNamespace): cfgtype="boolean", ) + CHECK_CACHED_FILE_SIZES = _config.ConfigItem( + True, + "Whether to send requests to check the size of files in the cache match the expected online file." + "If False, lksearch will assume files within the cache are complete and will not check their file size." + "Setting to True will create a modest speed up to retrieving paths for cached files, but will be lest robust.", + cfgtype="boolean", + ) + conf = Conf() log = logging.getLogger("lksearch") diff --git a/src/lksearch/catalogsearch.py b/src/lksearch/catalogsearch.py index 2c46c75..55dad13 100644 --- a/src/lksearch/catalogsearch.py +++ b/src/lksearch/catalogsearch.py @@ -254,47 +254,55 @@ def query_id( max_results = len(np.atleast_1d(search_object)) if output_catalog is not None and input_catalog is not None: - if output_catalog != input_catalog: + if output_catalog.lower() != input_catalog.lower(): max_results = max_results * 10 - if input_catalog in np.atleast_1d( - _Catalog_Dictionary[output_catalog]["crossmatch_catalogs"] + if input_catalog.lower() in np.atleast_1d( + _Catalog_Dictionary[output_catalog.lower()]["crossmatch_catalogs"] ): - if _Catalog_Dictionary[output_catalog]["crossmatch_type"] == "tic": + if ( + _Catalog_Dictionary[output_catalog.lower()]["crossmatch_type"] + == "tic" + ): # TIC is is crossmatched with gaiadr3/kic # If KIC data for a gaia source or vice versa is desired # search TIC to get KIC/gaia ids then Search KIC /GAIA source_id_column = _Catalog_Dictionary["tic"][ "crossmatch_column_id" - ][input_catalog] + ][input_catalog.lower()] new_id_table = _query_id( "tic", id_list, max_results, id_column=source_id_column ) id_list = ", ".join( new_id_table[ _Catalog_Dictionary["tic"]["crossmatch_column_id"][ - output_catalog + output_catalog.lower() ] ].astype(str) # .values ) - if _Catalog_Dictionary[output_catalog]["crossmatch_type"] == "column": + if ( + _Catalog_Dictionary[output_catalog.lower()]["crossmatch_type"] + == "column" + ): # TIC is is crossmatched with gaiadr3/kic # If we want TIC Info for a gaiadr3/KIC source - match appropriate column in TIC - id_column = _Catalog_Dictionary[output_catalog][ + id_column = _Catalog_Dictionary[output_catalog.lower()][ "crossmatch_column_id" - ][input_catalog] + ][input_catalog.lower()] else: raise ValueError( - f"{input_catalog} does not have crossmatched IDs with {output_catalog}. {output_catalog} can be crossmatched with {_Catalog_Dictionary[catalog]['crossmatch_catalogs']}" + f"{input_catalog} does not have crossmatched IDs with {output_catalog}. {output_catalog} can be crossmatched with {_Catalog_Dictionary[output_catalog.lower()]['crossmatch_catalogs']}" ) else: if output_catalog is None: output_catalog = _default_catalog - results_table = _query_id(output_catalog, id_list, max_results, id_column=id_column) + results_table = _query_id( + output_catalog.lower(), id_list, max_results, id_column=id_column + ) if return_skycoord: return _table_to_skycoord( - results_table, output_epoch=output_epoch, catalog=output_catalog + results_table, output_epoch=output_epoch, catalog=output_catalog.lower() ) else: return results_table.to_pandas()