Skip to content

Commit

Permalink
Relax taxonomy retrieval to enable loading data from Release 1 to 5
Browse files Browse the repository at this point in the history
  • Loading branch information
tcezard committed Nov 3, 2023
1 parent f07e38d commit 0edf527
Showing 1 changed file with 14 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,12 @@ def __init__(self, private_config_xml_file, config_profile, release_version, log

@lru_cache
def get_taxonomy_and_scientific_name(self, species_folder):
# TODO: Restore this function to only retrieve the taxonomy and scientific name using the taxonomy table in release 6
query = (
f"select distinct c.taxonomy, t.scientific_name "
f"from eva_progress_tracker.clustering_release_tracker c "
f"join evapro.taxonomy t on c.taxonomy=t.taxonomy_id "
f"where release_version={self.release_version} AND release_folder_name='{species_folder}'"
f"where release_folder_name='{species_folder}'"
)
with get_metadata_connection_handle(self.config_profile, self.private_config_xml_file) as db_conn:
results = get_all_results_for_query(db_conn, query)
Expand All @@ -212,10 +213,21 @@ def get_taxonomy_and_scientific_name(self, species_folder):
query = (
f"select distinct taxonomy, scientific_name "
f"from eva_progress_tracker.clustering_release_tracker "
f"where release_version={self.release_version} AND release_folder_name='{species_folder}'"
f"where release_folder_name='{species_folder}'"
)
with get_metadata_connection_handle(self.config_profile, self.private_config_xml_file) as db_conn:
results = get_all_results_for_query(db_conn, query)
if len(results) < 1:
# Support for directory from release 1
if species_folder.split('_')[-1].isdigit():
taxonomy = int(species_folder.split('_')[-1])
query = (
f"select distinct taxonomy, scientific_name "
f"from eva_progress_tracker.clustering_release_tracker "
f"where taxonomy={taxonomy}"
)
with get_metadata_connection_handle(self.config_profile, self.private_config_xml_file) as db_conn:
results = get_all_results_for_query(db_conn, query)
if len(results) < 1:
logger.warning(f'Failed to get scientific name and taxonomy for {species_folder}')
return None, None
Expand Down

0 comments on commit 0edf527

Please sign in to comment.