From b0f099c1396c8b3e367d1e7fd1691d2779be7b12 Mon Sep 17 00:00:00 2001 From: Pavel Tomanek Date: Fri, 19 Apr 2024 14:13:36 +0200 Subject: [PATCH 1/6] adding easyconfigs: CheckM2-1.0.2-foss-2022a.eb --- .../c/CheckM2/CheckM2-1.0.2-foss-2022a.eb | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb new file mode 100644 index 00000000000..bb17e6c2713 --- /dev/null +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb @@ -0,0 +1,48 @@ +easyblock = 'PythonBundle' + +name = 'CheckM2' +version = '1.0.2' + +homepage = 'https://github.com/chklovski/CheckM2/' +description = "Assessing the quality of metagenome-derived genome bins using machine learning" + +toolchain = {'name': 'foss', 'version': '2022a'} + +builddependencies = [('CMake', '3.24.3')] +dependencies = [ + ('Python', '3.10.4'), + ('SciPy-bundle', '2022.05'), + ('DIAMOND', '2.1.0'), + ('TensorFlow', '2.11.0'), + ('prodigal', '2.6.3'), + ('h5py', '3.7.0'), + ('tqdm', '4.64.0'), +] + +exts_list = [ + ('scikit-learn', '0.23.2', { + 'modulename': 'sklearn', + 'checksums': ['20766f515e6cd6f954554387dfae705d93c7b544ec0e6c6a5d8e006f6f7ef480'], + }), + ('lightgbm', '3.2.1', { + 'checksums': ['bd98e3b501b4c24dc127f4ad93e467f42923fe3eefa99e143b5b93158f024395'], + }), + (name, version, { + 'source_urls': ['https://github.com/chklovski/CheckM2/archive/'], + 'sources': ['%(version)s.tar.gz'], + 'checksums': ['9d3129e4d0b53acc38519a259cc1e20a215dff0cbce51cef874545ca2fff005a'], + }), +] + +postinstallcmds = [ + 'sed -i "s/np.float/float/g" %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/predictQuality.py', + # '%(installdir)s/bin/checkm2 database --download --path %(installdir)s', + # 'export CHECKM2DB="$EBROOTCHECKM2/CheckM2_database/uniref100.KO.1.dmnd"', +] + +# modextrapaths = {'CHECKM2DB': 'CheckM2_database/uniref100.KO.1.dmnd', 'PYTHONPATH': ''} + +use_pip = True +sanity_pip_check = True + +moduleclass = 'bio' From e9a8f9daca49db349d23b775bd9583b5cc13ea74 Mon Sep 17 00:00:00 2001 From: Pavel Tomanek Date: Wed, 24 Apr 2024 14:33:16 +0200 Subject: [PATCH 2/6] add 2022b version and patch for fileManager.py --- .../c/CheckM2/CheckM2-1.0.2-foss-2022a.eb | 48 --- .../c/CheckM2/CheckM2-1.0.2-foss-2022b.eb | 67 +++ ...M2-1.0.2_fileManager.py-database-fix.patch | 405 ++++++++++++++++++ 3 files changed, 472 insertions(+), 48 deletions(-) delete mode 100644 easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb create mode 100644 easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb create mode 100644 easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb deleted file mode 100644 index bb17e6c2713..00000000000 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022a.eb +++ /dev/null @@ -1,48 +0,0 @@ -easyblock = 'PythonBundle' - -name = 'CheckM2' -version = '1.0.2' - -homepage = 'https://github.com/chklovski/CheckM2/' -description = "Assessing the quality of metagenome-derived genome bins using machine learning" - -toolchain = {'name': 'foss', 'version': '2022a'} - -builddependencies = [('CMake', '3.24.3')] -dependencies = [ - ('Python', '3.10.4'), - ('SciPy-bundle', '2022.05'), - ('DIAMOND', '2.1.0'), - ('TensorFlow', '2.11.0'), - ('prodigal', '2.6.3'), - ('h5py', '3.7.0'), - ('tqdm', '4.64.0'), -] - -exts_list = [ - ('scikit-learn', '0.23.2', { - 'modulename': 'sklearn', - 'checksums': ['20766f515e6cd6f954554387dfae705d93c7b544ec0e6c6a5d8e006f6f7ef480'], - }), - ('lightgbm', '3.2.1', { - 'checksums': ['bd98e3b501b4c24dc127f4ad93e467f42923fe3eefa99e143b5b93158f024395'], - }), - (name, version, { - 'source_urls': ['https://github.com/chklovski/CheckM2/archive/'], - 'sources': ['%(version)s.tar.gz'], - 'checksums': ['9d3129e4d0b53acc38519a259cc1e20a215dff0cbce51cef874545ca2fff005a'], - }), -] - -postinstallcmds = [ - 'sed -i "s/np.float/float/g" %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/predictQuality.py', - # '%(installdir)s/bin/checkm2 database --download --path %(installdir)s', - # 'export CHECKM2DB="$EBROOTCHECKM2/CheckM2_database/uniref100.KO.1.dmnd"', -] - -# modextrapaths = {'CHECKM2DB': 'CheckM2_database/uniref100.KO.1.dmnd', 'PYTHONPATH': ''} - -use_pip = True -sanity_pip_check = True - -moduleclass = 'bio' diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb new file mode 100644 index 00000000000..f1401f4dcdb --- /dev/null +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb @@ -0,0 +1,67 @@ +easyblock = 'PythonBundle' + +name = 'CheckM2' +version = '1.0.2' + +homepage = 'https://github.com/chklovski/CheckM2/' +description = "Assessing the quality of metagenome-derived genome bins using machine learning" + +toolchain = {'name': 'foss', 'version': '2022b'} #12.2.0 + +builddependencies = [('CMake', '3.24.3')] +dependencies = [ + ('Python', '3.10.8'), + ('SciPy-bundle', '2023.02'), + ('DIAMOND', '2.1.8'), + ('TensorFlow', '2.13.0'), + ('prodigal', '2.6.3'), + ('h5py', '3.8.0'), + ('tqdm', '4.64.1'), +] + +exts_list = [ + ('scikit-learn', '0.23.2', { + 'modulename': 'sklearn', + 'checksums': ['20766f515e6cd6f954554387dfae705d93c7b544ec0e6c6a5d8e006f6f7ef480'], + }), + ('lightgbm', '3.2.1', { + 'checksums': ['bd98e3b501b4c24dc127f4ad93e467f42923fe3eefa99e143b5b93158f024395'], + }), + (name, version, { + 'patches': ['%(name)s-%(version)s_fileManager.py-database-fix.patch'], + 'source_urls': ['https://github.com/chklovski/CheckM2/archive/'], + 'sources': ['%(version)s.tar.gz'], + 'checksums': [ + {'1.0.2.tar.gz': '9d3129e4d0b53acc38519a259cc1e20a215dff0cbce51cef874545ca2fff005a'}, + {'CheckM2-1.0.2_fileManager.py-database-fix.patch': + '72f3f7cfebe7a8f550eb165b91bdcfb225ef71cedde2433f28a3e34dfca024aa'}, + ], + }), +] + +postinstallcmds = [ + # np.float is depreciated in newer numpy + 'sed -i "s/np.float/float/g" %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/predictQuality.py', + # update DB_LOCATION_DEFINITION in defaultValues.py to env CHECKM2DB + 'sed -i "31d" %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/defaultValues.py', + "sed -i '30 a\ DB_LOCATION_DEFINITION = os.environ.get(\"CHECKM2DB\", \"Not Set\")' " + + "%(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/defaultValues.py", + # convert defaultsValues.py to dos style + "unix2dos %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/defaultValues.py", +] + +modloadmsg = """You need download a diamond database now and setup a path to this db: +First you need to setup $CHECKM2DB as: +$ export CHECKM2DB="path/to/database/CheckM2_database/uniref100.KO.1.dmnd" +Next, download the database (/CheckM2_database/uniref100.KO.1.dmnd will be created): +$ checkm2 database --download --path path/to/database +You can check path to the database by: +$ checkm2 database --current +You can either test if everything is setup properly by: +$ checkm2 testrun +""" + +use_pip = True +sanity_pip_check = True + +moduleclass = 'bio' diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch new file mode 100644 index 00000000000..3f460c3b6a3 --- /dev/null +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch @@ -0,0 +1,405 @@ +--- checkm2/fileManager.py.orig 2024-04-23 14:17:24.825732872 +0200 ++++ checkm2/fileManager.py 2024-04-24 11:23:05.737180000 +0200 +@@ -1,228 +1,173 @@ +-import os +-import errno +-import sys +-import logging +-import shutil +-import requests +-import json +-import gzip +-import tempfile +- +-from checkm2 import versionControl +-from checkm2.defaultValues import DefaultValues +-from checkm2 import zenodo_backpack +- +-class DiamondDB: +- def __init__(self): +- if DefaultValues.DB_VAR in os.environ: +- self.DATABASE_DIR = os.environ[DefaultValues.DB_VAR] +- #Check if it's still there and if not, unset variable +- if not os.path.exists(self.DATABASE_DIR): +- logging.warning('Database not found using the environmental variable: {}. Please fix your $PATH. Using internal database path instead.'.format(DefaultValues.DB_VAR)) +- +- diamond_definition = self.__get_db_file() +- +- if diamond_definition['DBPATH'] == 'Not Set': +- self.DATABASE_DIR = 'Not Set' +- else: +- self.DATABASE_DIR = diamond_definition['DBPATH'] +- else: +- diamond_definition = self.__get_db_file() +- +- if diamond_definition['DBPATH'] == 'Not Set': +- self.DATABASE_DIR = 'Not Set' +- else: +- self.DATABASE_DIR = diamond_definition['DBPATH'] +- +- +- def __get_db_file(self): +- diamond_location = DefaultValues.DB_LOCATION_DEFINITION +- try: +- with open(diamond_location) as f: +- diamond_definition = json.load(f) +- return diamond_definition +- except: +- logging.warning('Could not open DIAMOND location definition file. Creating new file.') +- db_ref_file = {"Type": "DIAMONDDB", "DBPATH": "Not Set"} +- with open(diamond_location, 'w') as dl: +- json.dump(db_ref_file, dl) +- try: +- with open(diamond_location) as f: +- diamond_definition = json.load(f) +- return diamond_definition +- except Exception as e: +- logging.error('Could not create new file: {}'.format(e)) +- sys.exit(1) +- +- +- def get_DB_location(self): +- if self.DATABASE_DIR == 'Not Set': +- logging.error('DIAMOND database not found. Please download database using ') +- sys.exit(1) +- else: +- return self.DATABASE_DIR +- +- def set_DB_location(self, provided_location): +- logging.info('Checking provided DIAMOND database location') +- if versionControl.VersionControl().checksum_version_validate_DIAMOND(provided_location): +- #great - let's set it +- diamond_definition = self.__get_db_file() +- +- diamond_definition['DBPATH'] = os.path.abspath(provided_location) +- with open(DefaultValues.DB_LOCATION_DEFINITION, 'w') as dd: +- json.dump(diamond_definition, dd) +- +- logging.info("Database check successful! Database path successfully added.") +- +- else: +- logging.error("Checksum in CheckM2 reference doesn't match provided database. Please check your files.") +- sys.exit(1) +- +- +- def download_database(self, download_location): +- +- '''Uses a DOI link to automatically download, unpack and verify from zenodo.org''' +- +- logging.info("Command: Download database. Checking internal path information.") +- +- diamond_location = DefaultValues.DB_LOCATION_DEFINITION +- +- try: +- with open(diamond_location) as f: +- diamond_definition = json.load(f) +- except: +- logging.warning('Could not open DIAMOND location definition file. Creating new file.') +- x = {"Type": "DIAMONDDB", "DBPATH": "Not Set"} +- with open(diamond_location, 'w') as dl: +- json.dump(x, dl) +- try: +- with open(diamond_location) as f: +- diamond_definition = json.load(f) +- except Exception as e: +- logging.error('Could not create new file: {}'.format(e)) +- sys.exit(1) +- if diamond_definition['DBPATH'] != 'Not Set': +- logging.warning('DIAMOND database found at {}. Overwriting previous database.'.format(diamond_definition['DBPATH'])) +- +- +- make_sure_path_exists(os.path.join(download_location, 'CheckM2_database')) +- +- backpack_downloader = zenodo_backpack.zenodo_backpack_downloader('INFO') +- highest_compatible_version, DOI = versionControl.VersionControl().return_highest_compatible_DB_version() +- +- +- diamond_loc_final = os.path.join(download_location, 'CheckM2_database', 'uniref100.KO.1.dmnd') +- +- if download_location is not None: +- #check we have writing permission +- try: +- os.makedirs(download_location, exist_ok=True) +- with tempfile.TemporaryDirectory(dir=download_location): +- pass +- except OSError: +- logging.error("You do not appear to have permission to write to {}. Please choose a different directory" +- .format(download_location)) +- sys.exit(1) +- +- backpack_downloader.download_and_extract(download_location, DOI, progress_bar=True, no_check_version=False) +- +- diamond_definition['DBPATH'] = os.path.abspath(diamond_loc_final) +- +- with open(diamond_location, 'w') as dd: +- json.dump(diamond_definition, dd) +- +- +- else: +- logging.info('Failed to determine download location') +- sys.exit(1) +- +- #do checksum +- if versionControl.VersionControl().checksum_version_validate_DIAMOND(): +- logging.info('Diamond DATABASE downloaded successfully! Consider running to verify everything works.') +- else: +- logging.error('Could not verify successfull installation of reference database.') +- +- +- def update_database(self): +- pass +- +-def update_checkm2(): +- pass +- +- +-def check_empty_dir(input_dir, overwrite=False): +- """Check the the specified directory is empty and create it if necessary.""" +- if not os.path.exists(input_dir): +- make_sure_path_exists(input_dir) +- else: +- # check if directory is empty +- files = os.listdir(input_dir) +- if len(files) != 0: +- if overwrite: +- for root, dirs, files in os.walk(input_dir): +- for f in files: +- os.unlink(os.path.join(root, f)) +- for d in dirs: +- shutil.rmtree(os.path.join(root, d)) +- else: +- logging.error('Output directory must be empty: ' + input_dir + ' Use --force if you wish to overwrite ' +- 'existing directory. \n') +- sys.exit(1) +- +-def verify_prodigal_output(prodigal_dir, ttable_dict, bin_extension): +- """Check the prodigal process was successful, matches internal list of genomes, and return list of protein files.""" +- if not os.path.exists(prodigal_dir): +- # check if directory is empty +- logging.error('Error: Protein directory {} does not exist: ' + prodigal_dir + '\n') +- sys.exit(1) +- +- else: +- files = os.listdir(prodigal_dir) +- #check if files were generated +- if len(files) == 0: +- logging.error('Error: No protein files were generated in {}'.format(prodigal_dir)) +- sys.exit(1) +- +- prodigal_files = [] +- +- for f in files: +- if f.endswith('.faa'): +- protein_file = os.path.join(prodigal_dir, f) +- if os.stat(protein_file).st_size == 0: +- logging.warning("Skipping protein file {} as it was empty.".format(protein_file)) +- del ttable_dict[f[:-4]] +- elif os.path.splitext(os.path.basename(f))[0] not in ttable_dict: +- logging.warning("Skipping protein file {} as it was not generated by Checkm2.".format(protein_file)) +- else: +- prodigal_files.append(protein_file) +- if len(prodigal_files) == len(ttable_dict.keys()): +- return prodigal_files, ttable_dict +- else: +- logging.error('Error: List of protein files does not match internal reference.') +- sys.exit(1) +- +-def check_if_file_exists(inputFile): +- """Check if file exists.""" +- if not os.path.exists(inputFile): +- logging.error('File does not exist: ' + inputFile + '\n') +- sys.exit(1) +- +- +-def check_if_dir_exists(inputDir): +- """Check if directory exists.""" +- if not os.path.exists(inputDir): +- logging.error('Input directory does not exists: ' + inputDir + '\n') +- sys.exit(1) +- +- +-def make_sure_path_exists(path): +- """Create directory if it does not exist.""" +- if not path: +- return +- +- try: +- os.makedirs(path) +- except OSError as exception: +- if exception.errno != errno.EEXIST: +- logging.error('Specified path does not exist: ' + path + '\n') +- sys.exit(1) ++import os ++import errno ++import sys ++import logging ++import shutil ++import requests ++import json ++import gzip ++import tempfile ++ ++from checkm2 import versionControl ++from checkm2.defaultValues import DefaultValues ++from checkm2 import zenodo_backpack ++ ++class DiamondDB: ++ def __init__(self): ++ if DefaultValues.DB_VAR in os.environ: ++ self.DATABASE_DIR = os.environ[DefaultValues.DB_VAR] ++ #Check if it's still there and if not, unset variable ++ if not os.path.exists(self.DATABASE_DIR): ++ logging.warning('Database not found using the environmental variable: {}. Please fix your $PATH. Using internal database path instead.'.format(DefaultValues.DB_VAR)) ++ ++ diamond_definition = self.__get_db_file() ++ ++ if diamond_definition == 'Not Set': ++ self.DATABASE_DIR = 'Not Set' ++ else: ++ self.DATABASE_DIR = diamond_definition ++ else: ++ diamond_definition = self.__get_db_file() ++ ++ if diamond_definition == 'Not Set': ++ self.DATABASE_DIR = 'Not Set' ++ else: ++ self.DATABASE_DIR = diamond_definition ++ ++ ++ def __get_db_file(self): ++ return DefaultValues.DB_LOCATION_DEFINITION ++ ++ def get_DB_location(self): ++ if self.DATABASE_DIR == 'Not Set': ++ logging.error( ++ 'DIAMOND database not found. Please download database using $ checkm2 database --download --path /path/to/database ' ++ + ',but FIRST set CHECKM2DB to PATH by $ export CHECKM2DB=\"/path/to/database/CheckM2_database/uniref100.KO.1.dmnd\"' ++ ) ++ sys.exit(1) ++ else: ++ return self.DATABASE_DIR ++ ++ def set_DB_location(self, provided_location): ++ logging.info("Set path to database location by: $ export CHECKM2DB=path/to/database/CheckM2_database/uniref100.KO.1.dmnd") ++ ++ def download_database(self, download_location): ++ ++ '''Uses a DOI link to automatically download, unpack and verify from zenodo.org''' ++ ++ logging.info("Command: Download database. Checking internal path information.") ++ ++ diamond_location = DefaultValues.DB_LOCATION_DEFINITION ++ ++ make_sure_path_exists(os.path.join(download_location, 'CheckM2_database')) ++ ++ backpack_downloader = zenodo_backpack.zenodo_backpack_downloader('INFO') ++ highest_compatible_version, DOI = versionControl.VersionControl().return_highest_compatible_DB_version() ++ ++ if download_location is not None: ++ #check we have writing permission ++ try: ++ os.makedirs(download_location, exist_ok=True) ++ with tempfile.TemporaryDirectory(dir=download_location): ++ pass ++ except OSError: ++ logging.error("You do not appear to have permission to write to {}. Please choose a different directory" ++ .format(download_location)) ++ sys.exit(1) ++ ++ backpack_downloader.download_and_extract(download_location, DOI, progress_bar=True, no_check_version=False) ++ ++ else: ++ logging.info('Failed to determine download location') ++ sys.exit(1) ++ ++ #do checksum ++ if versionControl.VersionControl().checksum_version_validate_DIAMOND(): ++ logging.info('Diamond DATABASE downloaded successfully! Consider running to verify everything works.') ++ else: ++ logging.error('Could not verify successfull installation of reference database.') ++ ++ ++ def update_database(self): ++ pass ++ ++def update_checkm2(): ++ pass ++ ++ ++def check_empty_dir(input_dir, overwrite=False): ++ """Check the the specified directory is empty and create it if necessary.""" ++ if not os.path.exists(input_dir): ++ make_sure_path_exists(input_dir) ++ else: ++ # check if directory is empty ++ files = os.listdir(input_dir) ++ if len(files) != 0: ++ if overwrite: ++ for root, dirs, files in os.walk(input_dir): ++ for f in files: ++ os.unlink(os.path.join(root, f)) ++ for d in dirs: ++ shutil.rmtree(os.path.join(root, d)) ++ else: ++ logging.error('Output directory must be empty: ' + input_dir + ' Use --force if you wish to overwrite ' ++ 'existing directory. \n') ++ sys.exit(1) ++ ++def verify_prodigal_output(prodigal_dir, ttable_dict, bin_extension): ++ """Check the prodigal process was successful, matches internal list of genomes, and return list of protein files.""" ++ if not os.path.exists(prodigal_dir): ++ # check if directory is empty ++ logging.error('Error: Protein directory {} does not exist: ' + prodigal_dir + '\n') ++ sys.exit(1) ++ ++ else: ++ files = os.listdir(prodigal_dir) ++ #check if files were generated ++ if len(files) == 0: ++ logging.error('Error: No protein files were generated in {}'.format(prodigal_dir)) ++ sys.exit(1) ++ ++ prodigal_files = [] ++ ++ for f in files: ++ if f.endswith('.faa'): ++ protein_file = os.path.join(prodigal_dir, f) ++ if os.stat(protein_file).st_size == 0: ++ logging.warning("Skipping protein file {} as it was empty.".format(protein_file)) ++ del ttable_dict[f[:-4]] ++ elif os.path.splitext(os.path.basename(f))[0] not in ttable_dict: ++ logging.warning("Skipping protein file {} as it was not generated by Checkm2.".format(protein_file)) ++ else: ++ prodigal_files.append(protein_file) ++ if len(prodigal_files) == len(ttable_dict.keys()): ++ return prodigal_files, ttable_dict ++ else: ++ logging.error('Error: List of protein files does not match internal reference.') ++ sys.exit(1) ++ ++def check_if_file_exists(inputFile): ++ """Check if file exists.""" ++ if not os.path.exists(inputFile): ++ logging.error('File does not exist: ' + inputFile + '\n') ++ sys.exit(1) ++ ++ ++def check_if_dir_exists(inputDir): ++ """Check if directory exists.""" ++ if not os.path.exists(inputDir): ++ logging.error('Input directory does not exists: ' + inputDir + '\n') ++ sys.exit(1) ++ ++ ++def make_sure_path_exists(path): ++ """Create directory if it does not exist.""" ++ if not path: ++ return ++ ++ try: ++ os.makedirs(path) ++ except OSError as exception: ++ if exception.errno != errno.EEXIST: ++ logging.error('Specified path does not exist: ' + path + '\n') ++ sys.exit(1) +\ No newline at end of file From 5463882745652a9fe760a9ead17df8bf37ba508a Mon Sep 17 00:00:00 2001 From: Pavel Tomanek Date: Wed, 24 Apr 2024 14:37:04 +0200 Subject: [PATCH 3/6] clenup easyconfig --- easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb index f1401f4dcdb..c77a19c2483 100644 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb @@ -6,7 +6,7 @@ version = '1.0.2' homepage = 'https://github.com/chklovski/CheckM2/' description = "Assessing the quality of metagenome-derived genome bins using machine learning" -toolchain = {'name': 'foss', 'version': '2022b'} #12.2.0 +toolchain = {'name': 'foss', 'version': '2022b'} builddependencies = [('CMake', '3.24.3')] dependencies = [ From 50be12d692c3db45779eca0cad00e1f50a306663 Mon Sep 17 00:00:00 2001 From: Pavel Tomanek Date: Wed, 24 Apr 2024 16:13:17 +0200 Subject: [PATCH 4/6] update checksum for checkm2 --- easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb index c77a19c2483..bd35a141653 100644 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb @@ -34,7 +34,7 @@ exts_list = [ 'checksums': [ {'1.0.2.tar.gz': '9d3129e4d0b53acc38519a259cc1e20a215dff0cbce51cef874545ca2fff005a'}, {'CheckM2-1.0.2_fileManager.py-database-fix.patch': - '72f3f7cfebe7a8f550eb165b91bdcfb225ef71cedde2433f28a3e34dfca024aa'}, + '67a58b4169a013eac6587af1cc12cdc8297ea2685046af780096cec5b88b7001'}, ], }), ] From 3b4c741e007fb7b092a10011763615348d3aef7e Mon Sep 17 00:00:00 2001 From: Pavel Tomanek Date: Thu, 9 May 2024 12:32:54 +0200 Subject: [PATCH 5/6] update sed in postinstallcmds and update description in a patch --- .../easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb | 9 +++------ .../CheckM2-1.0.2_fileManager.py-database-fix.patch | 4 ++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb index bd35a141653..2dab4ab6a7c 100644 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb @@ -34,7 +34,7 @@ exts_list = [ 'checksums': [ {'1.0.2.tar.gz': '9d3129e4d0b53acc38519a259cc1e20a215dff0cbce51cef874545ca2fff005a'}, {'CheckM2-1.0.2_fileManager.py-database-fix.patch': - '67a58b4169a013eac6587af1cc12cdc8297ea2685046af780096cec5b88b7001'}, + '5f0124ed41a25587b4f98abaacc8005b61c13778616fe7ecf0c9b12f3d034cfa'}, ], }), ] @@ -43,11 +43,8 @@ postinstallcmds = [ # np.float is depreciated in newer numpy 'sed -i "s/np.float/float/g" %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/predictQuality.py', # update DB_LOCATION_DEFINITION in defaultValues.py to env CHECKM2DB - 'sed -i "31d" %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/defaultValues.py', - "sed -i '30 a\ DB_LOCATION_DEFINITION = os.environ.get(\"CHECKM2DB\", \"Not Set\")' " + - "%(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/defaultValues.py", - # convert defaultsValues.py to dos style - "unix2dos %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2/defaultValues.py", + "cd %(installdir)s/lib/python%(pyshortver)s/site-packages/checkm2 && " + r"sed -i 's/\(DB_LOCATION_DEFINITION\) = .*/\1 = os.environ.get(\"CHECKM2DB\", \"Not Set\")/' defaultValues.py", ] modloadmsg = """You need download a diamond database now and setup a path to this db: diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch index 3f460c3b6a3..54f0f72ee46 100644 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch @@ -1,3 +1,7 @@ +Author: Pavel Tomanek (Inuits) +This patch changes the way a path to the diamond database is set. It used to be stored in install dir in +diamond_path.json, but since this is unmodifiable file for user, the path is stored in env variable CHECKM2DB. +The patch needs to change whole file - there was a problem with dos style endings (CRLF). --- checkm2/fileManager.py.orig 2024-04-23 14:17:24.825732872 +0200 +++ checkm2/fileManager.py 2024-04-24 11:23:05.737180000 +0200 @@ -1,228 +1,173 @@ From 2b5f03e2a50c549b4010f0478b2e59e2e266e57f Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 30 May 2024 22:33:37 +0200 Subject: [PATCH 6/6] fix patch for CheckM2 --- .../c/CheckM2/CheckM2-1.0.2-foss-2022b.eb | 2 +- ...M2-1.0.2_fileManager.py-database-fix.patch | 399 +++--------------- 2 files changed, 59 insertions(+), 342 deletions(-) diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb index 2dab4ab6a7c..9f834b1a38f 100644 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2-foss-2022b.eb @@ -34,7 +34,7 @@ exts_list = [ 'checksums': [ {'1.0.2.tar.gz': '9d3129e4d0b53acc38519a259cc1e20a215dff0cbce51cef874545ca2fff005a'}, {'CheckM2-1.0.2_fileManager.py-database-fix.patch': - '5f0124ed41a25587b4f98abaacc8005b61c13778616fe7ecf0c9b12f3d034cfa'}, + '953f0eeef49ea537c0cb97c173a2488c29f09b58cd10800c60078b436a7ea2c7'}, ], }), ] diff --git a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch index 54f0f72ee46..5c64bd20738 100644 --- a/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch +++ b/easybuild/easyconfigs/c/CheckM2/CheckM2-1.0.2_fileManager.py-database-fix.patch @@ -1,48 +1,32 @@ -Author: Pavel Tomanek (Inuits) +Author: Pavel Tomanek (Inuits) + Kenneth Hoste (HPC-UGent) This patch changes the way a path to the diamond database is set. It used to be stored in install dir in diamond_path.json, but since this is unmodifiable file for user, the path is stored in env variable CHECKM2DB. The patch needs to change whole file - there was a problem with dos style endings (CRLF). ---- checkm2/fileManager.py.orig 2024-04-23 14:17:24.825732872 +0200 -+++ checkm2/fileManager.py 2024-04-24 11:23:05.737180000 +0200 -@@ -1,228 +1,173 @@ --import os --import errno --import sys --import logging --import shutil --import requests --import json --import gzip --import tempfile -- --from checkm2 import versionControl --from checkm2.defaultValues import DefaultValues --from checkm2 import zenodo_backpack -- --class DiamondDB: -- def __init__(self): -- if DefaultValues.DB_VAR in os.environ: -- self.DATABASE_DIR = os.environ[DefaultValues.DB_VAR] -- #Check if it's still there and if not, unset variable -- if not os.path.exists(self.DATABASE_DIR): -- logging.warning('Database not found using the environmental variable: {}. Please fix your $PATH. Using internal database path instead.'.format(DefaultValues.DB_VAR)) -- -- diamond_definition = self.__get_db_file() -- +diff -ruZ CheckM2-1.0.2.orig/checkm2/fileManager.py CheckM2-1.0.2/checkm2/fileManager.py +--- CheckM2-1.0.2.orig/checkm2/fileManager.py 2023-05-19 01:56:46.000000000 +0200 ++++ CheckM2-1.0.2/checkm2/fileManager.py 2024-05-30 22:13:45.230761282 +0200 +@@ -22,62 +22,34 @@ + + diamond_definition = self.__get_db_file() + - if diamond_definition['DBPATH'] == 'Not Set': -- self.DATABASE_DIR = 'Not Set' -- else: ++ if diamond_definition == 'Not Set': + self.DATABASE_DIR = 'Not Set' + else: - self.DATABASE_DIR = diamond_definition['DBPATH'] -- else: -- diamond_definition = self.__get_db_file() -- ++ self.DATABASE_DIR = diamond_definition + else: + diamond_definition = self.__get_db_file() + - if diamond_definition['DBPATH'] == 'Not Set': -- self.DATABASE_DIR = 'Not Set' -- else: ++ if diamond_definition == 'Not Set': + self.DATABASE_DIR = 'Not Set' + else: - self.DATABASE_DIR = diamond_definition['DBPATH'] -- -- -- def __get_db_file(self): ++ self.DATABASE_DIR = diamond_definition + + + def __get_db_file(self): - diamond_location = DefaultValues.DB_LOCATION_DEFINITION - try: - with open(diamond_location) as f: @@ -61,15 +45,20 @@ The patch needs to change whole file - there was a problem with dos style ending - logging.error('Could not create new file: {}'.format(e)) - sys.exit(1) - -- -- def get_DB_location(self): -- if self.DATABASE_DIR == 'Not Set': ++ return DefaultValues.DB_LOCATION_DEFINITION + + def get_DB_location(self): + if self.DATABASE_DIR == 'Not Set': - logging.error('DIAMOND database not found. Please download database using ') -- sys.exit(1) -- else: -- return self.DATABASE_DIR -- -- def set_DB_location(self, provided_location): ++ logging.error( ++ 'DIAMOND database not found. Please download database using $ checkm2 database --download --path /path/to/database ' ++ + ',but FIRST set CHECKM2DB to PATH by $ export CHECKM2DB=\"/path/to/database/CheckM2_database/uniref100.KO.1.dmnd\"' ++ ) + sys.exit(1) + else: + return self.DATABASE_DIR + + def set_DB_location(self, provided_location): - logging.info('Checking provided DIAMOND database location') - if versionControl.VersionControl().checksum_version_validate_DIAMOND(provided_location): - #great - let's set it @@ -85,15 +74,14 @@ The patch needs to change whole file - there was a problem with dos style ending - logging.error("Checksum in CheckM2 reference doesn't match provided database. Please check your files.") - sys.exit(1) - -- -- def download_database(self, download_location): -- -- '''Uses a DOI link to automatically download, unpack and verify from zenodo.org''' -- -- logging.info("Command: Download database. Checking internal path information.") -- -- diamond_location = DefaultValues.DB_LOCATION_DEFINITION -- ++ logging.info("Set path to database location by: $ export CHECKM2DB=path/to/database/CheckM2_database/uniref100.KO.1.dmnd") + + def download_database(self, download_location): + +@@ -87,32 +59,11 @@ + + diamond_location = DefaultValues.DB_LOCATION_DEFINITION + - try: - with open(diamond_location) as f: - diamond_definition = json.load(f) @@ -112,298 +100,27 @@ The patch needs to change whole file - there was a problem with dos style ending - logging.warning('DIAMOND database found at {}. Overwriting previous database.'.format(diamond_definition['DBPATH'])) - - -- make_sure_path_exists(os.path.join(download_location, 'CheckM2_database')) -- -- backpack_downloader = zenodo_backpack.zenodo_backpack_downloader('INFO') -- highest_compatible_version, DOI = versionControl.VersionControl().return_highest_compatible_DB_version() -- + make_sure_path_exists(os.path.join(download_location, 'CheckM2_database')) + + backpack_downloader = zenodo_backpack.zenodo_backpack_downloader('INFO') + highest_compatible_version, DOI = versionControl.VersionControl().return_highest_compatible_DB_version() + - - diamond_loc_final = os.path.join(download_location, 'CheckM2_database', 'uniref100.KO.1.dmnd') - -- if download_location is not None: -- #check we have writing permission -- try: -- os.makedirs(download_location, exist_ok=True) -- with tempfile.TemporaryDirectory(dir=download_location): -- pass -- except OSError: -- logging.error("You do not appear to have permission to write to {}. Please choose a different directory" -- .format(download_location)) -- sys.exit(1) -- -- backpack_downloader.download_and_extract(download_location, DOI, progress_bar=True, no_check_version=False) -- + if download_location is not None: + #check we have writing permission + try: +@@ -126,12 +77,6 @@ + + backpack_downloader.download_and_extract(download_location, DOI, progress_bar=True, no_check_version=False) + - diamond_definition['DBPATH'] = os.path.abspath(diamond_loc_final) - - with open(diamond_location, 'w') as dd: - json.dump(diamond_definition, dd) - - -- else: -- logging.info('Failed to determine download location') -- sys.exit(1) -- -- #do checksum -- if versionControl.VersionControl().checksum_version_validate_DIAMOND(): -- logging.info('Diamond DATABASE downloaded successfully! Consider running to verify everything works.') -- else: -- logging.error('Could not verify successfull installation of reference database.') -- -- -- def update_database(self): -- pass -- --def update_checkm2(): -- pass -- -- --def check_empty_dir(input_dir, overwrite=False): -- """Check the the specified directory is empty and create it if necessary.""" -- if not os.path.exists(input_dir): -- make_sure_path_exists(input_dir) -- else: -- # check if directory is empty -- files = os.listdir(input_dir) -- if len(files) != 0: -- if overwrite: -- for root, dirs, files in os.walk(input_dir): -- for f in files: -- os.unlink(os.path.join(root, f)) -- for d in dirs: -- shutil.rmtree(os.path.join(root, d)) -- else: -- logging.error('Output directory must be empty: ' + input_dir + ' Use --force if you wish to overwrite ' -- 'existing directory. \n') -- sys.exit(1) -- --def verify_prodigal_output(prodigal_dir, ttable_dict, bin_extension): -- """Check the prodigal process was successful, matches internal list of genomes, and return list of protein files.""" -- if not os.path.exists(prodigal_dir): -- # check if directory is empty -- logging.error('Error: Protein directory {} does not exist: ' + prodigal_dir + '\n') -- sys.exit(1) -- -- else: -- files = os.listdir(prodigal_dir) -- #check if files were generated -- if len(files) == 0: -- logging.error('Error: No protein files were generated in {}'.format(prodigal_dir)) -- sys.exit(1) -- -- prodigal_files = [] -- -- for f in files: -- if f.endswith('.faa'): -- protein_file = os.path.join(prodigal_dir, f) -- if os.stat(protein_file).st_size == 0: -- logging.warning("Skipping protein file {} as it was empty.".format(protein_file)) -- del ttable_dict[f[:-4]] -- elif os.path.splitext(os.path.basename(f))[0] not in ttable_dict: -- logging.warning("Skipping protein file {} as it was not generated by Checkm2.".format(protein_file)) -- else: -- prodigal_files.append(protein_file) -- if len(prodigal_files) == len(ttable_dict.keys()): -- return prodigal_files, ttable_dict -- else: -- logging.error('Error: List of protein files does not match internal reference.') -- sys.exit(1) -- --def check_if_file_exists(inputFile): -- """Check if file exists.""" -- if not os.path.exists(inputFile): -- logging.error('File does not exist: ' + inputFile + '\n') -- sys.exit(1) -- -- --def check_if_dir_exists(inputDir): -- """Check if directory exists.""" -- if not os.path.exists(inputDir): -- logging.error('Input directory does not exists: ' + inputDir + '\n') -- sys.exit(1) -- -- --def make_sure_path_exists(path): -- """Create directory if it does not exist.""" -- if not path: -- return -- -- try: -- os.makedirs(path) -- except OSError as exception: -- if exception.errno != errno.EEXIST: -- logging.error('Specified path does not exist: ' + path + '\n') -- sys.exit(1) -+import os -+import errno -+import sys -+import logging -+import shutil -+import requests -+import json -+import gzip -+import tempfile -+ -+from checkm2 import versionControl -+from checkm2.defaultValues import DefaultValues -+from checkm2 import zenodo_backpack -+ -+class DiamondDB: -+ def __init__(self): -+ if DefaultValues.DB_VAR in os.environ: -+ self.DATABASE_DIR = os.environ[DefaultValues.DB_VAR] -+ #Check if it's still there and if not, unset variable -+ if not os.path.exists(self.DATABASE_DIR): -+ logging.warning('Database not found using the environmental variable: {}. Please fix your $PATH. Using internal database path instead.'.format(DefaultValues.DB_VAR)) -+ -+ diamond_definition = self.__get_db_file() -+ -+ if diamond_definition == 'Not Set': -+ self.DATABASE_DIR = 'Not Set' -+ else: -+ self.DATABASE_DIR = diamond_definition -+ else: -+ diamond_definition = self.__get_db_file() -+ -+ if diamond_definition == 'Not Set': -+ self.DATABASE_DIR = 'Not Set' -+ else: -+ self.DATABASE_DIR = diamond_definition -+ -+ -+ def __get_db_file(self): -+ return DefaultValues.DB_LOCATION_DEFINITION -+ -+ def get_DB_location(self): -+ if self.DATABASE_DIR == 'Not Set': -+ logging.error( -+ 'DIAMOND database not found. Please download database using $ checkm2 database --download --path /path/to/database ' -+ + ',but FIRST set CHECKM2DB to PATH by $ export CHECKM2DB=\"/path/to/database/CheckM2_database/uniref100.KO.1.dmnd\"' -+ ) -+ sys.exit(1) -+ else: -+ return self.DATABASE_DIR -+ -+ def set_DB_location(self, provided_location): -+ logging.info("Set path to database location by: $ export CHECKM2DB=path/to/database/CheckM2_database/uniref100.KO.1.dmnd") -+ -+ def download_database(self, download_location): -+ -+ '''Uses a DOI link to automatically download, unpack and verify from zenodo.org''' -+ -+ logging.info("Command: Download database. Checking internal path information.") -+ -+ diamond_location = DefaultValues.DB_LOCATION_DEFINITION -+ -+ make_sure_path_exists(os.path.join(download_location, 'CheckM2_database')) -+ -+ backpack_downloader = zenodo_backpack.zenodo_backpack_downloader('INFO') -+ highest_compatible_version, DOI = versionControl.VersionControl().return_highest_compatible_DB_version() -+ -+ if download_location is not None: -+ #check we have writing permission -+ try: -+ os.makedirs(download_location, exist_ok=True) -+ with tempfile.TemporaryDirectory(dir=download_location): -+ pass -+ except OSError: -+ logging.error("You do not appear to have permission to write to {}. Please choose a different directory" -+ .format(download_location)) -+ sys.exit(1) -+ -+ backpack_downloader.download_and_extract(download_location, DOI, progress_bar=True, no_check_version=False) -+ -+ else: -+ logging.info('Failed to determine download location') -+ sys.exit(1) -+ -+ #do checksum -+ if versionControl.VersionControl().checksum_version_validate_DIAMOND(): -+ logging.info('Diamond DATABASE downloaded successfully! Consider running to verify everything works.') -+ else: -+ logging.error('Could not verify successfull installation of reference database.') -+ -+ -+ def update_database(self): -+ pass -+ -+def update_checkm2(): -+ pass -+ -+ -+def check_empty_dir(input_dir, overwrite=False): -+ """Check the the specified directory is empty and create it if necessary.""" -+ if not os.path.exists(input_dir): -+ make_sure_path_exists(input_dir) -+ else: -+ # check if directory is empty -+ files = os.listdir(input_dir) -+ if len(files) != 0: -+ if overwrite: -+ for root, dirs, files in os.walk(input_dir): -+ for f in files: -+ os.unlink(os.path.join(root, f)) -+ for d in dirs: -+ shutil.rmtree(os.path.join(root, d)) -+ else: -+ logging.error('Output directory must be empty: ' + input_dir + ' Use --force if you wish to overwrite ' -+ 'existing directory. \n') -+ sys.exit(1) -+ -+def verify_prodigal_output(prodigal_dir, ttable_dict, bin_extension): -+ """Check the prodigal process was successful, matches internal list of genomes, and return list of protein files.""" -+ if not os.path.exists(prodigal_dir): -+ # check if directory is empty -+ logging.error('Error: Protein directory {} does not exist: ' + prodigal_dir + '\n') -+ sys.exit(1) -+ -+ else: -+ files = os.listdir(prodigal_dir) -+ #check if files were generated -+ if len(files) == 0: -+ logging.error('Error: No protein files were generated in {}'.format(prodigal_dir)) -+ sys.exit(1) -+ -+ prodigal_files = [] -+ -+ for f in files: -+ if f.endswith('.faa'): -+ protein_file = os.path.join(prodigal_dir, f) -+ if os.stat(protein_file).st_size == 0: -+ logging.warning("Skipping protein file {} as it was empty.".format(protein_file)) -+ del ttable_dict[f[:-4]] -+ elif os.path.splitext(os.path.basename(f))[0] not in ttable_dict: -+ logging.warning("Skipping protein file {} as it was not generated by Checkm2.".format(protein_file)) -+ else: -+ prodigal_files.append(protein_file) -+ if len(prodigal_files) == len(ttable_dict.keys()): -+ return prodigal_files, ttable_dict -+ else: -+ logging.error('Error: List of protein files does not match internal reference.') -+ sys.exit(1) -+ -+def check_if_file_exists(inputFile): -+ """Check if file exists.""" -+ if not os.path.exists(inputFile): -+ logging.error('File does not exist: ' + inputFile + '\n') -+ sys.exit(1) -+ -+ -+def check_if_dir_exists(inputDir): -+ """Check if directory exists.""" -+ if not os.path.exists(inputDir): -+ logging.error('Input directory does not exists: ' + inputDir + '\n') -+ sys.exit(1) -+ -+ -+def make_sure_path_exists(path): -+ """Create directory if it does not exist.""" -+ if not path: -+ return -+ -+ try: -+ os.makedirs(path) -+ except OSError as exception: -+ if exception.errno != errno.EEXIST: -+ logging.error('Specified path does not exist: ' + path + '\n') -+ sys.exit(1) -\ No newline at end of file + else: + logging.info('Failed to determine download location') + sys.exit(1)