From 2b4daa51f2fb95231d651c19b99eef7bec7c5c7c Mon Sep 17 00:00:00 2001 From: Fernando Meyer Date: Mon, 26 Aug 2024 17:25:12 +0200 Subject: [PATCH] fix error loading genome_coverage file --- cami_amber/utils/load_data.py | 19 ++----------------- version.py | 2 +- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/cami_amber/utils/load_data.py b/cami_amber/utils/load_data.py index 4a01afd..fd65f47 100755 --- a/cami_amber/utils/load_data.py +++ b/cami_amber/utils/load_data.py @@ -16,7 +16,6 @@ import pandas as pd import sys import logging -import traceback import os import mimetypes import gzip @@ -67,16 +66,14 @@ def open_coverages(file_path): logging.getLogger('amber').info('Loading coverage file') coverages_pd = pd.DataFrame() try: - samples_metadata = read_metadata(file_path) + samples_metadata = read_metadata((file_path, 'coverage file')) for metadata in samples_metadata: nrows = metadata[1] - metadata[0] + 1 - df = pd.read_csv(file_path, sep='\t', comment='#', skiprows=metadata[0], nrows=nrows, header=None) - df.rename(columns=metadata[3], inplace=True) + df = pd.read_csv(file_path, names=metadata[3], sep='\t', comment='#', skiprows=metadata[0], nrows=nrows, header=None) df = df[['GENOMEID', 'COVERAGE']] df['SAMPLEID'] = metadata[2]['SAMPLEID'] coverages_pd = pd.concat([coverages_pd, df], ignore_index=True) except BaseException as e: - traceback.print_exc() logging.getLogger('amber').critical("File {} not found or malformed. {}".format(file_path, e)) exit(1) return coverages_pd @@ -113,7 +110,6 @@ def load_ncbi_info(ncbi_dir): else: taxonomy_df = pd.read_feather(os.path.join(ncbi_dir, 'nodes.amber.ft')).set_index('TAXID') except BaseException: - traceback.print_exc() logging.getLogger('amber').info('Preprocessed NCBI taxonomy file not found. Creating file {}'.format(os.path.join(ncbi_dir, 'nodes.amber.ft'))) taxonomy_df = load_ncbi_taxinfo.preprocess_ncbi_tax(ncbi_dir) taxonomy_df = taxonomy_df.astype(dtype={rank: pd.UInt32Dtype() for rank in load_ncbi_taxinfo.RANKS}) @@ -196,17 +192,6 @@ def load_binnings(samples_metadata, file_path_query): return sample_id_to_query_df -def open_query(file_path_query): - try: - samples_metadata = read_metadata(file_path_query) - sample_id_to_query_df = load_binnings(samples_metadata, file_path_query) - except BaseException as e: - traceback.print_exc() - logging.getLogger('amber').critical("File {} not found or malformed. {}".format(file_path_query, e)) - exit(1) - return sample_id_to_query_df - - def get_sample_id_to_num_genomes(sample_id_to_queries_list): sample_id_to_num_genomes = {} for sample_id in sample_id_to_queries_list: diff --git a/version.py b/version.py index 4c354e0..13ce17d 100644 --- a/version.py +++ b/version.py @@ -1 +1 @@ -__version__ = '2.0.5' +__version__ = '2.0.6'