Skip to content

Commit

Permalink
added configuration parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
christinahedges committed Jan 2, 2025
1 parent 154493f commit 667386d
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 26 deletions.
51 changes: 37 additions & 14 deletions src/lksearch/MASTSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,28 @@ def _download_one(
# check to see if a cloud_uri exists, if so we just pass that

download = True
if not conf.CHECK_CACHED_FILE_SIZES:
# If this configuration parameter is set and the file exists
# in the cache, we do not search for it
local_path = "/".join(
[
config.get_cache_dir(),
"mastDownload",
row["obs_collection"],
row["obs_id"],
row["productFilename"],
]
)
if os.path.isfile(local_path):
manifest = pd.DataFrame(
{
"Local Path": [local_path],
"Status": ["UNKNOWN"],
"Message": [None],
"URL": [None],
}
)
return manifest
if not conf.DOWNLOAD_CLOUD:
if pd.notna(row["cloud_uri"]):
download = False
Expand Down Expand Up @@ -1189,18 +1211,19 @@ def download(
]

manifest = pd.concat(manifest)
status = manifest["Status"] != "COMPLETE"
if np.any(status):
warnings.warn(
"Not All Files Downloaded Successfully, Check Returned Manifest.",
SearchWarning,
)
if remove_incomplete:
for file in manifest.loc[status]["Local Path"].values:
if os.path.isfile(file):
os.remove(file)
warnings.warn(f"Removed {file}", SearchWarning)
else:
warnings.warn(f"Not a file: {file}", SearchWarning)
manifest = manifest.reset_index(drop=True)
if conf.CHECK_CACHED_FILE_SIZES:
status = manifest["Status"] != "COMPLETE"
if np.any(status):
warnings.warn(
"Not All Files Downloaded Successfully, Check Returned Manifest.",
SearchWarning,
)
if remove_incomplete:
for file in manifest.loc[status]["Local Path"].values:
if os.path.isfile(file):
os.remove(file)
warnings.warn(f"Removed {file}", SearchWarning)
else:
warnings.warn(f"Not a file: {file}", SearchWarning)
manifest = manifest.reset_index(drop=True)
return manifest
8 changes: 8 additions & 0 deletions src/lksearch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ class Conf(_config.ConfigNamespace):
cfgtype="boolean",
)

CHECK_CACHED_FILE_SIZES = _config.ConfigItem(
True,
"Whether to send requests to check the size of files in the cache match the expected online file."
"If False, lksearch will assume files within the cache are complete and will not check their file size."
"Setting to True will create a modest speed up to retrieving paths for cached files, but will be lest robust.",
cfgtype="boolean",
)


conf = Conf()
log = logging.getLogger("lksearch")
Expand Down
32 changes: 20 additions & 12 deletions src/lksearch/catalogsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,47 +254,55 @@ def query_id(
max_results = len(np.atleast_1d(search_object))

if output_catalog is not None and input_catalog is not None:
if output_catalog != input_catalog:
if output_catalog.lower() != input_catalog.lower():
max_results = max_results * 10
if input_catalog in np.atleast_1d(
_Catalog_Dictionary[output_catalog]["crossmatch_catalogs"]
if input_catalog.lower() in np.atleast_1d(
_Catalog_Dictionary[output_catalog.lower()]["crossmatch_catalogs"]
):
if _Catalog_Dictionary[output_catalog]["crossmatch_type"] == "tic":
if (
_Catalog_Dictionary[output_catalog.lower()]["crossmatch_type"]
== "tic"
):
# TIC is is crossmatched with gaiadr3/kic
# If KIC data for a gaia source or vice versa is desired
# search TIC to get KIC/gaia ids then Search KIC /GAIA
source_id_column = _Catalog_Dictionary["tic"][
"crossmatch_column_id"
][input_catalog]
][input_catalog.lower()]
new_id_table = _query_id(
"tic", id_list, max_results, id_column=source_id_column
)
id_list = ", ".join(
new_id_table[
_Catalog_Dictionary["tic"]["crossmatch_column_id"][
output_catalog
output_catalog.lower()
]
].astype(str)
# .values
)
if _Catalog_Dictionary[output_catalog]["crossmatch_type"] == "column":
if (
_Catalog_Dictionary[output_catalog.lower()]["crossmatch_type"]
== "column"
):
# TIC is is crossmatched with gaiadr3/kic
# If we want TIC Info for a gaiadr3/KIC source - match appropriate column in TIC
id_column = _Catalog_Dictionary[output_catalog][
id_column = _Catalog_Dictionary[output_catalog.lower()][
"crossmatch_column_id"
][input_catalog]
][input_catalog.lower()]
else:
raise ValueError(
f"{input_catalog} does not have crossmatched IDs with {output_catalog}. {output_catalog} can be crossmatched with {_Catalog_Dictionary[catalog]['crossmatch_catalogs']}"
f"{input_catalog} does not have crossmatched IDs with {output_catalog}. {output_catalog} can be crossmatched with {_Catalog_Dictionary[output_catalog.lower()]['crossmatch_catalogs']}"
)
else:
if output_catalog is None:
output_catalog = _default_catalog

results_table = _query_id(output_catalog, id_list, max_results, id_column=id_column)
results_table = _query_id(
output_catalog.lower(), id_list, max_results, id_column=id_column
)
if return_skycoord:
return _table_to_skycoord(
results_table, output_epoch=output_epoch, catalog=output_catalog
results_table, output_epoch=output_epoch, catalog=output_catalog.lower()
)
else:
return results_table.to_pandas()
Expand Down

0 comments on commit 667386d

Please sign in to comment.