From d556d086e4c05952344f08cff2da53ea4c376770 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 9 May 2024 12:09:13 +0100 Subject: [PATCH] Couple of bug fixes and sync with config file --- .../run_release_in_embassy/__init__.py | 25 ------------- .../release_common_utils.py | 5 ++- .../run_release_for_assembly.nf | 24 ++++++------ .../run_release_for_assembly.py | 5 ++- .../run_release_for_species.py | 37 +++++++++++-------- 5 files changed, 40 insertions(+), 56 deletions(-) diff --git a/eva-accession-release-automation/run_release_in_embassy/__init__.py b/eva-accession-release-automation/run_release_in_embassy/__init__.py index 00e90d898..e69de29bb 100644 --- a/eva-accession-release-automation/run_release_in_embassy/__init__.py +++ b/eva-accession-release-automation/run_release_in_embassy/__init__.py @@ -1,25 +0,0 @@ -# Copyright 2020 EMBL - European Bioinformatics Institute -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import sys - - -def init_logger(): - logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(levelname)s %(message)s') - result_logger = logging.getLogger(__name__) - return result_logger - - -logger = init_logger() diff --git a/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py b/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py index 608080afe..58829a864 100644 --- a/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py +++ b/eva-accession-release-automation/run_release_in_embassy/release_common_utils.py @@ -12,18 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os import signal import traceback from functools import lru_cache +from ebi_eva_common_pyutils.logger import logging_config + from run_release_in_embassy.release_metadata import get_target_mongo_instance_for_assembly from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle from ebi_eva_common_pyutils.network_utils import get_available_local_port, forward_remote_port_to_local_port from ebi_eva_common_pyutils.taxonomy import taxonomy -logger = logging.getLogger(__name__) +logger = logging_config.get_logger(__name__) def open_mongo_port_to_tempmongo(private_config_xml_file, profile, taxonomy_id, assembly, diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf index 676aa3016..12a524d7a 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.nf @@ -3,7 +3,7 @@ nextflow.enable.dsl=2 workflow { - initiate_release_status_for_assembly | copy_accessioning_collections_to_embassy | run_release_for_assembly | \ + initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | run_release_for_assembly | \ merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | validate_release_vcf_files | \ analyze_vcf_validation_results | count_rs_ids_in_release_files | validate_rs_release_files | \ update_sequence_names_to_ena | update_release_status_for_assembly @@ -20,7 +20,7 @@ process initiate_release_status_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 """ } @@ -35,7 +35,7 @@ process copy_accessioning_collections_to_embassy { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1 """ } @@ -50,7 +50,7 @@ process run_release_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1 """ } @@ -65,7 +65,7 @@ process merge_dbsnp_eva_release_files { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -80,7 +80,7 @@ process sort_bgzip_index_release_files { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -95,7 +95,7 @@ process validate_release_vcf_files { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1 """ } @@ -110,7 +110,7 @@ process analyze_vcf_validation_results { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1 """ } @@ -125,7 +125,7 @@ process count_rs_ids_in_release_files { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -140,7 +140,7 @@ process validate_rs_release_files { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1 """ } @@ -155,7 +155,7 @@ process update_sequence_names_to_ena { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1 """ } @@ -170,6 +170,6 @@ process update_release_status_for_assembly { script: """ export PYTHONPATH=$params.python_path - $params.python_script -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 + $params.executable.python.interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1 """ } diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py index 630f5ec92..f6c6b326c 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_assembly.py @@ -13,16 +13,17 @@ # limitations under the License. import click -import logging import sys import traceback +from ebi_eva_common_pyutils.logger import logging_config + from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo from ebi_eva_common_pyutils.command_utils import run_command_with_output -logger = logging.getLogger(__name__) +logger = logging_config.get_logger(__name__) def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession, diff --git a/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py b/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py index f3c41a8c8..531177120 100644 --- a/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py +++ b/eva-accession-release-automation/run_release_in_embassy/run_release_for_species.py @@ -14,6 +14,7 @@ from argparse import ArgumentParser import os +from functools import lru_cache import yaml from ebi_eva_common_pyutils.command_utils import run_command_with_output @@ -24,12 +25,12 @@ from run_release_in_embassy.release_common_utils import get_release_folder_name -logger = logging_config.getLogger(__name__) +logger = logging_config.get_logger(__name__) def get_nextflow_params(taxonomy_id, assembly_accession, release_version): dump_dir = os.path.join(get_species_release_folder(taxonomy_id), 'dumps') - release_dir = get_release_log_file_name(taxonomy_id, assembly_accession) + release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession) config_param = os.path.join(release_dir, f'nextflow_params_{taxonomy_id}_{assembly_accession}.yaml') os.makedirs(dump_dir, exist_ok=True) yaml_data = { @@ -52,7 +53,7 @@ def get_nextflow_params(taxonomy_id, assembly_accession, release_version): def get_nextflow_config(): if 'RELEASE_NEXTFLOW_CONFIG' in os.environ and os.path.isfile(os.environ['RELEASE_NEXTFLOW_CONFIG']): - return os.environ['RELEASE_NEXTFLOW_CONFIG'] + return os.path.abspath(os.environ['RELEASE_NEXTFLOW_CONFIG']) def get_run_release_for_assembly_nextflow(): @@ -61,15 +62,21 @@ def get_run_release_for_assembly_nextflow(): def get_release_log_file_name(taxonomy_id, assembly_accession): - return f"{cfg['species-release-folder']}/{assembly_accession}/release_{taxonomy_id}_{assembly_accession}.log" + return f"{get_assembly_release_folder(taxonomy_id, assembly_accession)}/release_{taxonomy_id}_{assembly_accession}.log" +@lru_cache def get_species_release_folder(taxonomy_id): - return os.path.join(cfg["release_output"], get_release_folder_name(taxonomy_id)) + folder = os.path.join(cfg.query('release', 'release_output'), get_release_folder_name(taxonomy_id)) + os.makedirs(folder, exist_ok=True) + return folder +@lru_cache def get_assembly_release_folder(taxonomy_id, assembly_accession): - return os.path.join(get_species_release_folder(taxonomy_id), assembly_accession) + folder = os.path.join(get_species_release_folder(taxonomy_id), assembly_accession) + os.makedirs(folder, exist_ok=True) + return folder def run_release_for_species(taxonomy_id, release_assemblies, release_version, resume=False): @@ -88,11 +95,11 @@ def run_release_for_species(taxonomy_id, release_assemblies, release_version, re release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession) nextflow_config = get_nextflow_config() workflow_command = ' '.join(( - f"cd {release_dir} && " - f"{cfg.query('executable', 'nextflow')} run {workflow_file_path} " - f"-params-file {nextflow_params} " - f'-c {nextflow_config}' if nextflow_config else '' - '-resume' if resume else '' + f"cd {release_dir} &&", + f"{cfg.query('executable', 'nextflow')} run {workflow_file_path}", + f"-params-file {nextflow_params}", + f'-c {nextflow_config}' if nextflow_config else '', + '-resume' if resume else '', )) logger.info(f"Running workflow file {workflow_file_path} with the following command: " f"\n {workflow_command} \n") @@ -109,12 +116,12 @@ def load_config(*args): def main(): argparse = ArgumentParser() - argparse.add_argument("--taxonomy-id", help="ex: 9913", required=True) - argparse.add_argument("--assembly-accessions", nargs='+', help="ex: GCA_000003055.3") + argparse.add_argument("--taxonomy_id", help="ex: 9913", required=True) + argparse.add_argument("--assembly_accessions", nargs='+', help="ex: GCA_000003055.3") argparse.add_argument("--release_version", required=True) argparse.add_argument("--resume", default=False, required=False, help="Resume the nextflow pipeline for the specified taxonomy and assembly") - argparse.add_argument("--release_config-properties-file", + argparse.add_argument("--release_config_file", help="Path to the release configuration file. That will override the config specified with " "RELEASE_CONFIG variable or placed in ~/.release_config.yml.", required=False) @@ -122,7 +129,7 @@ def main(): logging_config.add_stdout_handler() - load_config(args.common_release_properties_file) + load_config(args.release_config_file) run_release_for_species(args.taxonomy_id, args.assembly_accessions, args.release_version, args.resume)