Skip to content

Commit

Permalink
Couple of bug fixes and sync with config file
Browse files Browse the repository at this point in the history
  • Loading branch information
tcezard committed May 9, 2024
1 parent 67e91db commit d556d08
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 56 deletions.
Original file line number Diff line number Diff line change
@@ -1,25 +0,0 @@
# Copyright 2020 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import sys


def init_logger():
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)-15s %(levelname)s %(message)s')
result_logger = logging.getLogger(__name__)
return result_logger


logger = init_logger()
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os
import signal
import traceback
from functools import lru_cache

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.release_metadata import get_target_mongo_instance_for_assembly
from ebi_eva_internal_pyutils.metadata_utils import get_metadata_connection_handle
from ebi_eva_common_pyutils.network_utils import get_available_local_port, forward_remote_port_to_local_port
from ebi_eva_common_pyutils.taxonomy import taxonomy

logger = logging.getLogger(__name__)
logger = logging_config.get_logger(__name__)


def open_mongo_port_to_tempmongo(private_config_xml_file, profile, taxonomy_id, assembly,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
nextflow.enable.dsl=2

workflow {
initiate_release_status_for_assembly | copy_accessioning_collections_to_embassy | run_release_for_assembly | \
initiate_release_status_for_assembly('initiate') | copy_accessioning_collections_to_embassy | run_release_for_assembly | \
merge_dbsnp_eva_release_files | sort_bgzip_index_release_files | validate_release_vcf_files | \
analyze_vcf_validation_results | count_rs_ids_in_release_files | validate_rs_release_files | \
update_sequence_names_to_ena | update_release_status_for_assembly
Expand All @@ -20,7 +20,7 @@ process initiate_release_status_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
"""
}

Expand All @@ -35,7 +35,7 @@ process copy_accessioning_collections_to_embassy {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1
"""
}

Expand All @@ -50,7 +50,7 @@ process run_release_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

Expand All @@ -65,7 +65,7 @@ process merge_dbsnp_eva_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -80,7 +80,7 @@ process sort_bgzip_index_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -95,7 +95,7 @@ process validate_release_vcf_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1
"""
}

Expand All @@ -110,7 +110,7 @@ process analyze_vcf_validation_results {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1
"""
}

Expand All @@ -125,7 +125,7 @@ process count_rs_ids_in_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -140,7 +140,7 @@ process validate_rs_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -155,7 +155,7 @@ process update_sequence_names_to_ena {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1
"""
}

Expand All @@ -170,6 +170,6 @@ process update_release_status_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.python_script -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
$params.executable.python.interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
"""
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@
# limitations under the License.

import click
import logging
import sys
import traceback

from ebi_eva_common_pyutils.logger import logging_config

from run_release_in_embassy.create_release_properties_file import create_release_properties_file_for_assembly
from run_release_in_embassy.release_common_utils import open_mongo_port_to_tempmongo, close_mongo_port_to_tempmongo
from ebi_eva_common_pyutils.command_utils import run_command_with_output


logger = logging.getLogger(__name__)
logger = logging_config.get_logger(__name__)


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from argparse import ArgumentParser

import os
from functools import lru_cache

import yaml
from ebi_eva_common_pyutils.command_utils import run_command_with_output
Expand All @@ -24,12 +25,12 @@
from run_release_in_embassy.release_common_utils import get_release_folder_name


logger = logging_config.getLogger(__name__)
logger = logging_config.get_logger(__name__)


def get_nextflow_params(taxonomy_id, assembly_accession, release_version):
dump_dir = os.path.join(get_species_release_folder(taxonomy_id), 'dumps')
release_dir = get_release_log_file_name(taxonomy_id, assembly_accession)
release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession)
config_param = os.path.join(release_dir, f'nextflow_params_{taxonomy_id}_{assembly_accession}.yaml')
os.makedirs(dump_dir, exist_ok=True)
yaml_data = {
Expand All @@ -52,7 +53,7 @@ def get_nextflow_params(taxonomy_id, assembly_accession, release_version):

def get_nextflow_config():
if 'RELEASE_NEXTFLOW_CONFIG' in os.environ and os.path.isfile(os.environ['RELEASE_NEXTFLOW_CONFIG']):
return os.environ['RELEASE_NEXTFLOW_CONFIG']
return os.path.abspath(os.environ['RELEASE_NEXTFLOW_CONFIG'])


def get_run_release_for_assembly_nextflow():
Expand All @@ -61,15 +62,21 @@ def get_run_release_for_assembly_nextflow():


def get_release_log_file_name(taxonomy_id, assembly_accession):
return f"{cfg['species-release-folder']}/{assembly_accession}/release_{taxonomy_id}_{assembly_accession}.log"
return f"{get_assembly_release_folder(taxonomy_id, assembly_accession)}/release_{taxonomy_id}_{assembly_accession}.log"


@lru_cache
def get_species_release_folder(taxonomy_id):
return os.path.join(cfg["release_output"], get_release_folder_name(taxonomy_id))
folder = os.path.join(cfg.query('release', 'release_output'), get_release_folder_name(taxonomy_id))
os.makedirs(folder, exist_ok=True)
return folder


@lru_cache
def get_assembly_release_folder(taxonomy_id, assembly_accession):
return os.path.join(get_species_release_folder(taxonomy_id), assembly_accession)
folder = os.path.join(get_species_release_folder(taxonomy_id), assembly_accession)
os.makedirs(folder, exist_ok=True)
return folder


def run_release_for_species(taxonomy_id, release_assemblies, release_version, resume=False):
Expand All @@ -88,11 +95,11 @@ def run_release_for_species(taxonomy_id, release_assemblies, release_version, re
release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession)
nextflow_config = get_nextflow_config()
workflow_command = ' '.join((
f"cd {release_dir} && "
f"{cfg.query('executable', 'nextflow')} run {workflow_file_path} "
f"-params-file {nextflow_params} "
f'-c {nextflow_config}' if nextflow_config else ''
'-resume' if resume else ''
f"cd {release_dir} &&",
f"{cfg.query('executable', 'nextflow')} run {workflow_file_path}",
f"-params-file {nextflow_params}",
f'-c {nextflow_config}' if nextflow_config else '',
'-resume' if resume else '',
))
logger.info(f"Running workflow file {workflow_file_path} with the following command: "
f"\n {workflow_command} \n")
Expand All @@ -109,20 +116,20 @@ def load_config(*args):

def main():
argparse = ArgumentParser()
argparse.add_argument("--taxonomy-id", help="ex: 9913", required=True)
argparse.add_argument("--assembly-accessions", nargs='+', help="ex: GCA_000003055.3")
argparse.add_argument("--taxonomy_id", help="ex: 9913", required=True)
argparse.add_argument("--assembly_accessions", nargs='+', help="ex: GCA_000003055.3")
argparse.add_argument("--release_version", required=True)
argparse.add_argument("--resume", default=False, required=False,
help="Resume the nextflow pipeline for the specified taxonomy and assembly")
argparse.add_argument("--release_config-properties-file",
argparse.add_argument("--release_config_file",
help="Path to the release configuration file. That will override the config specified with "
"RELEASE_CONFIG variable or placed in ~/.release_config.yml.",
required=False)
args = argparse.parse_args()

logging_config.add_stdout_handler()

load_config(args.common_release_properties_file)
load_config(args.release_config_file)

run_release_for_species(args.taxonomy_id, args.assembly_accessions, args.release_version, args.resume)

Expand Down

0 comments on commit d556d08

Please sign in to comment.