From 55536fbb30024b7037049913a36577d85e9d3cf2 Mon Sep 17 00:00:00 2001 From: donkirkby Date: Fri, 23 Jul 2021 17:16:32 -0700 Subject: [PATCH 01/47] Add HIVIntact analysis, for #10. --- Singularity | 19 +++++++++++++++ gene_splicer/primer_finder.py | 37 +++++++++++++++++++++++++---- gene_splicer/primer_finder_class.py | 2 +- gene_splicer/sample.py | 18 +++++++++++--- gene_splicer/study_summary.py | 13 +++++++--- 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/Singularity b/Singularity index d938b8e..5f18945 100644 --- a/Singularity +++ b/Singularity @@ -28,6 +28,9 @@ From: ubuntu:22.04 fontconfig libbz2-dev liblzma-dev libssl-dev \ libffi-dev libsqlite3-dev + echo ===== Installing MAFFT ===== >/dev/null + apt-get install -y mafft + echo ===== Installing Python ===== >/dev/null apt-get install -y python3 python3-pip @@ -35,6 +38,8 @@ From: ubuntu:22.04 apt-get install -y ncbi-blast+ echo ===== Installing Python packages ===== >/dev/null + pip3 install git+https://github.com/cfe-lab/HIVIntact@cfe-1.2 + cd /opt/primer_finder pip3 install . @@ -55,3 +60,17 @@ From: ubuntu:22.04 %runscript gene_splicer_sample --hivseqinr /opt/hivseqinr "$@" + +%apprun hivintact + gene_splicer_sample --hivintact "$@" + +%apphelp hivintact + Search proviral consensus sequences for primers, then use HIVIntact to + decide if the genomes are complete. + +%applabels hivintact + KIVE_INPUTS sample_info_csv contigs_csv conseqs_csv cascade_csv + KIVE_OUTPUTS outcome_summary_csv conseqs_primers_csv contigs_primers_csv \ + table_precursor_csv hivseqinr_results_tar + KIVE_THREADS 1 + KIVE_MEMORY 200 diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index dbb741e..62dbdcc 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -1,4 +1,5 @@ import re +import subprocess from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType from csv import DictReader, DictWriter from itertools import groupby @@ -77,6 +78,9 @@ def parse_args(): help="Path to HIVSeqinR source code, or download " "destination. HIVSeqinR will be skipped if this " "isn't given.") + parser.add_argument('--hivintact', + action='store_true', + help="Launch the HIVIntact analysis.") parser.add_argument( '--nodups', action='store_false', @@ -386,8 +390,8 @@ def add_primers(row): def remove_primers(row): # Strip the primers out, convert index values from floats. newseq = row.sequence[ - int(row.fwd_sample_primer_size + row.fwd_sample_primer_start) - :-int(row.rev_sample_primer_size + row.rev_sample_primer_start)] + int(row.fwd_sample_primer_size + row.fwd_sample_primer_start): + -int(row.rev_sample_primer_size + row.rev_sample_primer_start)] row.sequence = newseq return row @@ -422,6 +426,13 @@ def archive_hivseqinr_results(working_path: Path, archive.add(result_path, result_path.name) +def archive_hivintact_results(working_path: Path, + hivintact_results_tar: typing.IO): + archive = TarFile(fileobj=hivintact_results_tar, mode='w') + for result_path in working_path.iterdir(): + archive.add(result_path, result_path.name) + + def run(contigs_csv, conseqs_csv, cascade_csv, @@ -433,7 +444,9 @@ def run(contigs_csv, sample_size=50, force_all_proviral=False, default_sample_name: str = None, - hivseqinr_results_tar: typing.IO = None): + hivseqinr_results_tar: typing.IO = None, + run_hivintact: bool = False, + hivintact_results_tar: typing.IO = None): all_samples = utils.get_samples_from_cascade(cascade_csv, default_sample_name) @@ -517,6 +530,21 @@ def run(contigs_csv, if hivseqinr_results_tar is not None: archive_hivseqinr_results(working_path, hivseqinr_results_tar) + if run_hivintact: + working_path: Path = outpath / f'hivintact_{i}' + working_path.mkdir(exist_ok=True) + with (working_path / 'hiv-intact.log').open('w') as log_file: + subprocess.run(['proviral', + 'intact', + '--subtype=B', + str(no_primers_fasta)], + check=True, + stdout=log_file, + stderr=subprocess.STDOUT, + cwd=working_path) + if hivintact_results_tar is not None: + archive_hivintact_results(working_path, + hivintact_results_tar) files.append(no_primers_fasta) return files @@ -531,7 +559,8 @@ def main(): hivseqinr=args.hivseqinr, nodups=args.nodups, split=args.split, - sample_size=args.sample_size) + sample_size=args.sample_size, + run_hivintact=args.hivintact) return {'fasta_files': fasta_files, 'args': args} diff --git a/gene_splicer/primer_finder_class.py b/gene_splicer/primer_finder_class.py index d5f8cd5..a4f5cf9 100644 --- a/gene_splicer/primer_finder_class.py +++ b/gene_splicer/primer_finder_class.py @@ -144,7 +144,7 @@ def get_slices(self): hxb2_slice = utils.hxb2[self.hxb2_start - self.validation_size:self.hxb2_end] if len(sample_slice) == 0: - logger.debug(\ + logger.debug( 'Sample slice size is 0! \n' f'start: {self.start} \n' f'end: {self.end} \n' diff --git a/gene_splicer/sample.py b/gene_splicer/sample.py index 5e49fb9..858782b 100644 --- a/gene_splicer/sample.py +++ b/gene_splicer/sample.py @@ -44,7 +44,8 @@ def parse_args(): help='Data for proviral landscape plot', type=FileType('w')) parser.add_argument('hivseqinr_results_tar', - help="Archive file with HIVSeqinR's final results folder.", + help="Archive file with HIVSeqinR's final results " + "folder, or HIVIntact's results.", type=FileType('wb')) parser.add_argument( '-p', @@ -57,6 +58,9 @@ def parse_args(): help="Path to HIVSeqinR source code, or download " "destination. HIVSeqinR will be skipped if this " "isn't given.") + parser.add_argument('--hivintact', + action='store_true', + help="Launch the HIVIntact analysis.") parser.add_argument( '--nodups', action='store_false', @@ -88,10 +92,16 @@ def main(): info_reader = DictReader(args.sample_info_csv) sample_info: dict = next(info_reader) run_name = sample_info.get('run_name', 'kive_run') + if args.hivintact: + hivseqinr_results_tar = None + hivintact_results_tar = args.hivseqinr_results_tar + else: + hivseqinr_results_tar = args.hivseqinr_results_tar + hivintact_results_tar = None fasta_files = primer_finder.run(contigs_csv=args.contigs_csv, conseqs_csv=args.conseqs_csv, cascade_csv=args.cascade_csv, - hivseqinr_results_tar=args.hivseqinr_results_tar, + hivseqinr_results_tar=hivseqinr_results_tar, name=run_name, outpath=outpath, hivseqinr=args.hivseqinr, @@ -99,7 +109,9 @@ def main(): split=args.split, sample_size=args.sample_size, force_all_proviral=True, - default_sample_name=sample_info['sample']) + default_sample_name=sample_info['sample'], + run_hivintact=args.hivintact, + hivintact_results_tar=hivintact_results_tar) for file in fasta_files: gene_splicer.run(file, outdir=outpath) utils.generate_table_precursor(name=run_name, outpath=outpath) diff --git a/gene_splicer/study_summary.py b/gene_splicer/study_summary.py index 74a5652..45efe1f 100644 --- a/gene_splicer/study_summary.py +++ b/gene_splicer/study_summary.py @@ -43,6 +43,10 @@ def parse_args(): 'the runs in samples_csv. Any samples not found ' 'in samples.csv will guess the participant id ' 'from the first part of the sample name.') + parser.add_argument('--hivintact', + action='store_true', + help="Launch the HIVIntact analysis instead of " + "HIVSeqinR.") return parser.parse_args() @@ -197,7 +201,7 @@ def write_warnings(self, report_file: typing.TextIO, limit: int = None): file=report_file) -def run_gene_splicer(run_path: Path, outcome_folder: Path): +def run_gene_splicer(run_path: Path, outcome_folder: Path, run_hivintact: bool): version_results_path = run_path / 'Results' / 'version_7.14' assert version_results_path.exists(), version_results_path denovo_path = version_results_path / 'denovo' @@ -214,11 +218,14 @@ def run_gene_splicer(run_path: Path, outcome_folder: Path): pipeline_args = [python_path, '-m', 'gene_splicer.pipeline', '--outpath', str(outcome_folder), - '--hivseqinr', str(hivseqinr_path), contigs_path, conseq_path, cascade_path, short_run_name] + if run_hivintact: + pipeline_args.append('--hivintact') + else: + pipeline_args.append(f'--hivseqinr={hivseqinr_path}') try: with log_path.open('w') as log_file: run(pipeline_args, @@ -259,7 +266,7 @@ def main(): print('Missing denovo results:', run_path) continue else: - run_gene_splicer(run_path, outcome_path.parent) + run_gene_splicer(run_path, outcome_path.parent, args.hivintact) assert outcome_path.exists(), outcome_path print('.', end='', flush=True) dots_printed = True From 1d549872a50e211d86d098f1cce747c61f4e603d Mon Sep 17 00:00:00 2001 From: donkirkby Date: Mon, 26 Jul 2021 11:45:00 -0700 Subject: [PATCH 02/47] Install mafft and blast in GitHub Actions, for #10. --- .github/workflows/python-app.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index a4e8adf..8a9e0d6 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -24,6 +24,7 @@ jobs: echo /opt/minimap2-2.17_x64-linux >> $GITHUB_PATH - name: Install dependencies run: | + sudo apt install -qq mafft ncbi-blast+ python -m pip install --upgrade pip pipenv pipenv install --dev - name: Test with pytest From 6b77caceeac9446d6ea464528bf06a8ae92049f8 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 19 May 2023 15:48:19 -0700 Subject: [PATCH 03/47] Enable more checks in HIVIntact --- gene_splicer/primer_finder.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index 62dbdcc..8027440 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -537,6 +537,12 @@ def run(contigs_csv, subprocess.run(['proviral', 'intact', '--subtype=B', + '--run-hypermut', + '--check-long-deletion', + '--check-nonhiv', + '--check-scramble', + '--check-internal-inversion', + '--include-small-orfs', str(no_primers_fasta)], check=True, stdout=log_file, From 28ae510236da3918130090da3ea21046cacf6db2 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 14 Jul 2023 13:54:52 -0700 Subject: [PATCH 04/47] Specify writable workdir for hivintact --- gene_splicer/primer_finder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index 8027440..6a631cb 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -536,6 +536,7 @@ def run(contigs_csv, with (working_path / 'hiv-intact.log').open('w') as log_file: subprocess.run(['proviral', 'intact', + '--working-folder', working_path, '--subtype=B', '--run-hypermut', '--check-long-deletion', From 9aadb156b992bf58e1b4e3b5a5af916e4f264f4f Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 5 Jun 2023 11:32:27 -0700 Subject: [PATCH 05/47] Update dependencies in the README file --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index fa27f8d..718ae86 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ 1. minimap2 (https://github.com/lh3/minimap2) (must be available via commandline) 2. blast tools (ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/) 3. R and RSCRIPT (https://www.r-project.org/) +4. mafft (https://mafft.cbrc.jp/alignment/software/) ### Singularity builds * Build all singularity images inside of the `simages` folder From 7cfd902c61efde1d4f4704fae23bac21768fb562 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 5 Jun 2023 12:58:56 -0700 Subject: [PATCH 06/47] Add HIVIntact to our python dependencies --- Pipfile | 1 + Pipfile.lock | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++- setup.py | 1 + 3 files changed, 187 insertions(+), 3 deletions(-) diff --git a/Pipfile b/Pipfile index a039031..a9d403d 100644 --- a/Pipfile +++ b/Pipfile @@ -10,6 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" +intactness-pipeline = {ref = "cfe-1.2", git = "https://github.com/cfe-lab/HIVIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 5bc34d3..aed6c1c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "73432848a2ff11546bd4d06f2cabd2c46876a987e2d0a9e8c0d379cdfd9eb4f5" + "sha256": "100f2cea5bdbda87105a66b042daed34e24cc3e092e8382794b6f03c5553b02d" }, "pipfile-spec": 6, "requires": { @@ -16,6 +16,43 @@ ] }, "default": { + "appdirs": { + "hashes": [ + "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", + "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" + ], + "version": "==1.4.4" + }, + "biopython": { + "hashes": [ + "sha256:11d673698b3d0d6589292ea951fb62cb24ea27d273eca0d08dbbd956690f97f5", + "sha256:2cf38112b6d8415ad39d6a611988cd11fb5f33eb09346666a87263beba9614e0", + "sha256:2f9cfaf16d55ab80d514e7aebe5710dabe4e4ff47ede851031202e33b3249da3", + "sha256:35506e39822c52d11cf09a3951e82375ca1bb9303960b4286acf02c9a6f6c4cc", + "sha256:3b36ba1bf6395c09a365c53530c9d71f3617763fa2c1d452b3d8948368c0f1de", + "sha256:655df416936662c0c8a06a549cb25e1560e1fea5067d850f34fb714b8a3fae6c", + "sha256:65b93b513ce9dd7b2ce058720eadf42cd03f312db3409356efeb93123d1320aa", + "sha256:6ebfbce0d91796c7aef422ee9dffe8827e07e5abaa94545e006f1f20e965c80b", + "sha256:762c6c43a8486b5fcd07f136a3217b87d24755618b9ea9da1f17124ff44c2ad6", + "sha256:793c42a376cd63f62f8a088ce39b7dc6b5c55e4e9031d887c434de1595bfa4b8", + "sha256:7a168709694e10b338718c18d967edd5b56c237dc88642c22275796007a70000", + "sha256:7c5c07123ff5f44c9e6b5369df854a38afd3c0c50ef58498a0ae8f7eb799f3e8", + "sha256:811796f8d222aa3869a50e31e54ce62b69106b47cd8bb06934867c0d843297b5", + "sha256:8bb0c690c7368f255ed45236bf0f5464b476b8c083c8f634533921af78278261", + "sha256:919a2c583cabf9c96d2ae4e1245a6b0376932fb342aca302a0fc198b71ab3275", + "sha256:97cbdbed01b2512471f36c74b91658d1dfbdcbf39bc038f6ce5a41c3e60a8fc6", + "sha256:9ba33244f0eff830beaa7240065bdb5095d96fded6599b76bbb9ddab45cd2bbd", + "sha256:a51d9c1d1b4b634447535da74a644fae59bc234fbbf9001e2dc6b6fbabb98019", + "sha256:b09efcb4733c8770f25eab5fe555a96a08f5ab9e1bc36939e08ebf2ffbf3e0f1", + "sha256:b37c0d24191e5c96ca02415a5188551980c83a0d518bbc4ffe3c9a5d1fe0ee81", + "sha256:ccd729249fd5f586dd4c2a3507c2ea2456825d7e615e97c07c409c850eaf4594", + "sha256:e41b55edcfd448630e77bf4de66a7235324a8a149621499891da6bd1d5085b9a", + "sha256:ee51bb1cd7decffd24da6b76d5e01b7e2fd818ab85cf0c180226cbb5793a3abd", + "sha256:ef7c79b65b0b3f3c7dc59e20a7f8ae5758d8e852cb8b9cace590dc5617e348ba" + ], + "markers": "python_version >= '3.7'", + "version": "==1.81" + }, "certifi": { "hashes": [ "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7", @@ -105,6 +142,14 @@ "markers": "python_full_version >= '3.7.0'", "version": "==3.2.0" }, + "click": { + "hashes": [ + "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd", + "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.6" + }, "gotoh": { "git": "https://github.com/cfe-lab/MiCall.git", "ref": "f1687e7b7c7f1f3a6f3cb63107f1cf9b2b210f26", @@ -118,6 +163,118 @@ "markers": "python_version >= '3.5'", "version": "==3.4" }, + "intactness-pipeline": { + "git": "https://github.com/cfe-lab/HIVIntact", + "ref": "14f571d873131d2521a6bddcefbf795914ac78c7" + }, + "jarowinkler": { + "hashes": [ + "sha256:07df473a812772794181885fc8e9950b629809297c8a1c00e06d0376cb6f5611", + "sha256:0dd61e79babfbca37f6f4d2b81bfbc92979e5e22f02d04ba5e762d84901a95bf", + "sha256:1097b349e09c6ae2d92520ef0ab79580b6b136f6f1c1d62ad783595011f0f260", + "sha256:18da76d3a6d7a0898f36525a1ce8303fcb5413d1bcbc30c3f3634344aeecf397", + "sha256:18e3af57ac066a617bd688d62b9d0da11da32dca977d9fe5c1726040be26ad2e", + "sha256:1b243a43ef1740bdec3101243347ceb59f698f28df0c514935f4cf856af22795", + "sha256:21869871774ea4a34222538c33704234ee8e1b4c1a82fe95471215994575e631", + "sha256:21ae65449c52b14578fd28f51c2efdd976a632979054cf12e714cc86fdc1d1aa", + "sha256:28654c3cfd1f917900a44650cee3a6827210c1f1783ef5aca3399ee31ee2cd17", + "sha256:288c615134ec2d5d122fb834eb0e134f5ccd0080ce1091e2f8170d861de4c24d", + "sha256:298c708bd8609b0563846cd770891f4fc6492ea1c09ef7ac24a68731f4ede37a", + "sha256:2daa79de5856d34ee6a813d9b049d55aad7014a92ce1d90fb3e487338ec362ef", + "sha256:3af4e4aab7c6ba14f75bd74a21ee00befed67cd2221e626c5741545b4a57c60c", + "sha256:439d66dd82a452535293c2503a0930c2aacc4ebf9542f0ca52b351084e9f3e32", + "sha256:447c9b1323e7b16ff21da9121164b54c4a806f352f716b2a6e1f937acabc6e73", + "sha256:46e042d75ee91e1fe678ad0bdb6eea4d6d052f6e6ee35adac8bf5d01942e1f6a", + "sha256:49796215bd66bb87d2d88da7131b785330b3b2e50cbd7a7be75b4964512f5aa9", + "sha256:4a2297958acccf63da521f1e7d1c17e3f074db6bf6d4d9eb8c888e638fff2feb", + "sha256:4ea218d666041f41434957816e0a52e8533e7e191c8302ca062ebfa4ec42220d", + "sha256:4f91e8ee2b81c44d8f4aec164e84a976fcabe754fe107efae3eae2e9fb433ffd", + "sha256:4feec944743bdcd099b8967d16802c78f1009f3222a241b3d7424795ad301c54", + "sha256:4ffe4a84dba6a7cc9411f5185677a7fa86087d3a036281f837eec7a1afd93a34", + "sha256:50bac973e0aec697d73bf6b601e027e6079779fb9f6b0905eaefb055536bec39", + "sha256:51eeb42de858363e93c3407568b3fec1919b99a5ffb6d5c4e3dc494a12d37241", + "sha256:532c89ab12246f36338500b7c7c36b87389e01fca93eec74680423e5e5678677", + "sha256:53b9f9ca5cd56c82500171abc4818ef9f756e77e995ac57046f598fba2642f78", + "sha256:5586164f7063fd9d1704ba136041f5811d847e994dabb973ce4741f8d512a586", + "sha256:5719f55bbc84ff08e8ef8d6a87ee936dfa2d29554ae2fc2888214a336c660cbd", + "sha256:5aa5645ed7b77ebfa18f9cf7276dfe532d00d64c551fdbdf086c1583a40a5079", + "sha256:5b8a0e1476e7a0cf316e32acd02733f6dba38a19e57c8aa58dad8cbb69627b54", + "sha256:5e83b734568aeabf71a89b8f9a7b9630eca71de68e74701d306d56f9e8621c3f", + "sha256:5e954870ba9e8ad3ffdda976a71379b9cc8474195caa3009d89dda350cf5d0fe", + "sha256:66aa6d4e961d956da7508d9bf837686e2b957db14a19dbfb0aefe259f9c6a177", + "sha256:68a77b3f262fa90ec30563a50835c760f7417a2cf55138a77606f2def1a4d8b3", + "sha256:6e49f8c2258bdab01fef9dd8111811de8ec000a7b6f5a12283f2322ce5f473e3", + "sha256:6ea3f8e772debaf85ecf9b0aa07f9fd8de3bfaf52595edaa86c979309658afdc", + "sha256:71a41f8d34bb315ea245a9c78e1dc40e58e560ba699ae34932f397271eaa830e", + "sha256:7253c25288294474d98e269dd73d7e8d9f503655c77180201788c6f29848bb4e", + "sha256:786db72036f9b43aa6e4848584580ff8d0a33816f67050cc1d17f283a9446002", + "sha256:7986c585540262e2abe3badda0e4982291f6513bd3cd313447b0faf77fae454f", + "sha256:7dcfe9a47ec5e1c544add253660475fe44b771b0cc1b5d959ca9bdad8f778e65", + "sha256:7f964945c52bb21058718f1e074a14d231bff1dad83c8e8bd1607ed6add4b0fa", + "sha256:813f626b8f2703275e7ad18b842cedc1e6d06e4a334337f96b5a91afcda78ed2", + "sha256:8493cb25ae8627272537f40b6fdfb376824e38d1e8e7e48196e49494bbdc78f3", + "sha256:8b77e53d9a1a8aa84f6c3817790d0fa336a42f726277d9e5a0cf2420337349ee", + "sha256:8e85bb480eee04681d7f99ce95e86ec8d9182204737a3d141f5a97216d164d6c", + "sha256:8ed3635427c04c8680807ecf6b71014c145ae760c22243f8ff6dd1a8cc7fa695", + "sha256:91ccb6b51cff6158a7f699e0912ab243b7f0026d63919a7696214303e709a21e", + "sha256:93cb99fc11b44db61631eea23294f6ae66e944d27129b2856e52f66f11eb8082", + "sha256:967a10aed9fca73b826ab41d859ac6a35021ac39efeea5991070964db10a9b13", + "sha256:97175ef8bf47e796280c899c8d72788313e277a30cd5c4a549bbab60ce70e5f5", + "sha256:9b2c89b9893c2c0fb1c7369160e2a08258415df5345019dd61c3e15c2ca74b65", + "sha256:9bd54d4635bc9d01510fff1545b4ec1e26bddafde0aff6af1af4e46b80407e9e", + "sha256:9f968112e0b8be55b259e041be1f9f294931c8790f014c5c04f7c1ffe7928b78", + "sha256:a03972d2878e6954852ffce67a843de8a30c515eaa257313b609151e16036bab", + "sha256:a3f03c8178b94380e103c9368e84b88bfca437e59e484dc71d8b059d43c6e8dc", + "sha256:a71063e01863f561d86459929ad7c5f6c389922aada4170b67ab7c266e6cf96c", + "sha256:aa12fa8a788804fca8fde0f24c14015f3adf18b2336adb66526e326c15b59c72", + "sha256:ae3b951ff925a5c1fc7746845d796ce34891313813f6c3bc2d057759c8090c47", + "sha256:aea994d5673e9c3b49d548b58f961448bd8a2ba40d3244c1809c891ed29daa02", + "sha256:af28ea284cfbd1b21b29ff94b759f20e94e4f7c06f424b0b4702e701c2a21668", + "sha256:b24b58bd62de20cc773b0b55352d0a43d6cf2beb9b0a21bbaf5ca1f6f50d3d44", + "sha256:b29a7adb25bf02f1e007fec412a67a5c3c8de1ba062454de539e623eb638fcaf", + "sha256:b39b7cdb985f6c040830f047cd98a0563bbfa909944130223c23667432b39c73", + "sha256:b56248ab6e734b40309b6337b0de5cb37e7f0e71d64c7f5f0d58bcb46c05699d", + "sha256:b59b4e82ade4b9b147646189b500f2085e06c8c7746dd6311e03bc4d4ad126e8", + "sha256:b696f0f80df13e8e86958a9d0eaa9218a6a311b5c566f6a081ef17d7d594713b", + "sha256:b73bba435e9cd7618130907d753c708c84baddec5ee6e2637f9630f02496b189", + "sha256:b91d6fe7fd12c5d3bb82b644500df13ee0f7ae949f067e6d967be896aa340732", + "sha256:b959d3fcf4ffe865ee518328d77d137ea7b6ad0c8f1f8b96b7a08cd97d3a9c87", + "sha256:bbb94c0d894cde960b264f3f797c99cbe316e0280ea1b81e240d6ee4ec19fa0c", + "sha256:bc0dcc31ce493aa70067e1f7ed2cb1528b8bd86bb276f25b6c09fabf746b3df7", + "sha256:bc8182ba6561a19eddbbd88106b986b93ae11205919cea36385a260d2146c638", + "sha256:c0eac3a71193575002e2c374ff7be5ef4005e9370c29dad83e2537f57d09e07e", + "sha256:c1f3e5d5137419a608a878b76ec277c1618119259134ef94e323d5e7fdc2acfd", + "sha256:c834dd86fc4f372c0cd6ec7a33432e49e644de7b5d37f520b96500cab7e9d992", + "sha256:cba8a789610e97d29c850370c6c8f68c0481355446a356bfa0b2703d8afb8436", + "sha256:d2cf546b18f9d49d25f33dd564c06fbe29c0e3090d062bad84ba04e77fc7d168", + "sha256:d2debc08e15e6c16999c27c1afc4c2493c0d3f140206d24970872f4619ea840a", + "sha256:d4638b6b4569e418365aa12d8175025b93336bb074288ec8b9b259734da9990e", + "sha256:d5399e513b58496483eeda61ff180676fc6ff9c3b6ef53af3c53be0777e71247", + "sha256:d6648d9c68bcc79f80092fa00e9f897df12b9826f05b7211260b494742ae3e12", + "sha256:d8da4660934bbf3958e6bd0165ab088d6e65ef6cee0c52d82e86d424ca1be96f", + "sha256:da09cbcbb917d99fb341730dbb7892b7a642ef0ca371c7f3a647b4dae6770190", + "sha256:db49e56139da097b5d85f323b1ed906a5d9d6d3a4336ce694910852d0a4cd607", + "sha256:dd1cd8a99f7f3347d3b30941460531a0ee8b855f199a3b56ac6d49aa98266600", + "sha256:e2d2c6341b021b146db418c77ee71d4318013074761681aba42c1d332a723f7d", + "sha256:e36a5af0db07010e3cc70000edebc2cdb92c39beb2d10d721604a7a52c48100d", + "sha256:e3edd98b7fa078b06b1bd0e12d7e244c875e7030ad242eb31719f2f87e343862", + "sha256:e40905895ddbce8cbdc5f079299371630e771db3c0e7820b2d262c4bb6a8bea0", + "sha256:e480e39ed2420a881ac445f6fea8064c36f535970deb4ee94677afe06985b917", + "sha256:e68cb387d79871b45d20a670bdd33b0f9edb08ed85aa7a5eb19dafdecfa1c091", + "sha256:e8c6dba59166803347c96f48e1af608f8bbc8efe9d545e1a3f9bfb526e76fd62", + "sha256:eaa520f9b4e6e955269a117d7481332b06aff3fb04981fd218294793ba4ae5fc", + "sha256:ef39035486ce07745a0fee9dd80bd9a0b692811111da4ef9aedbc0ddd23ff9cd", + "sha256:f2afe56a6cb3e84cc77af6e4a1e8eb6f4f6211a8dd0468237aeee27e16501752", + "sha256:f8dd58576c81e8115ca29dc757feb413fd689d194789670a533384997306385e", + "sha256:fa1db18ba0a0fe383e9396e2db91d31fcabfc0ff03fa599b5a10edc57416084a", + "sha256:fa2d76d3572229ad282dd7ed0005387e9085bdfd954a7636a6f920530e3b670d", + "sha256:fa55d91bcf097b464df6efa92762434aa3026a9774ab2509895a1948bf64b121", + "sha256:fbcb4aafadf3ac758de12deb3c90c4e4b6497a104d00ecc8cb6585757af3ab90", + "sha256:fd4b9111a2092eaaaabd7dd33aa8703d734075a2f75ec87976eab0a2b60273ac" + ], + "markers": "python_version >= '3.6'", + "version": "==1.2.3" + }, "numpy": { "hashes": [ "sha256:012097b5b0d00a11070e8f2e261128c44157a8689f7dedcf35576e525893f4fe", @@ -185,7 +342,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "python-levenshtein": { @@ -256,6 +413,31 @@ "index": "pypi", "version": "==2.31.0" }, + "scipy": { + "hashes": [ + "sha256:08d957ca82d3535b3b9ba6c8ff355d78fe975271874e2af267cb5add5bd78625", + "sha256:249cfa465c379c9bb2c20123001e151ff5e29b351cbb7f9c91587260602c58d0", + "sha256:366a6a937110d80dca4f63b3f5b00cc89d36f678b2d124a01067b154e692bab1", + "sha256:39154437654260a52871dfde852adf1b93b1d1bc5dc0ffa70068f16ec0be2624", + "sha256:396fae3f8c12ad14c5f3eb40499fd06a6fef8393a6baa352a652ecd51e74e029", + "sha256:3b9963798df1d8a52db41a6fc0e6fa65b1c60e85d73da27ae8bb754de4792481", + "sha256:3e8eb42db36526b130dfbc417609498a6192381abc1975b91e3eb238e0b41c1a", + "sha256:512fdc18c65f76dadaca139348e525646d440220d8d05f6d21965b8d4466bccd", + "sha256:aec8c62fbe52914f9cf28d846cf0401dd80ab80788bbab909434eb336ed07c04", + "sha256:b41a0f322b4eb51b078cb3441e950ad661ede490c3aca66edef66f4b37ab1877", + "sha256:b4bb943010203465ac81efa392e4645265077b4d9e99b66cf3ed33ae12254173", + "sha256:b588311875c58d1acd4ef17c983b9f1ab5391755a47c3d70b6bd503a45bfaf71", + "sha256:ba94eeef3c9caa4cea7b402a35bb02a5714ee1ee77eb98aca1eed4543beb0f4c", + "sha256:be8c962a821957fdde8c4044efdab7a140c13294997a407eaee777acf63cbf0c", + "sha256:cce154372f0ebe88556ed06d7b196e9c2e0c13080ecb58d0f35062dc7cc28b47", + "sha256:d51565560565a0307ed06fa0ec4c6f21ff094947d4844d6068ed04400c72d0c3", + "sha256:e866514bc2d660608447b6ba95c8900d591f2865c07cca0aa4f7ff3c4ca70f30", + "sha256:fb5b492fa035334fd249f0973cc79ecad8b09c604b42a127a677b45a9a3d4289", + "sha256:ffb28e3fa31b9c376d0fb1f74c1f13911c8c154a760312fbee87a21eb21efe31" + ], + "markers": "python_version < '3.13' and python_version >= '3.9'", + "version": "==1.11.1" + }, "setuptools": { "hashes": [ "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f", @@ -269,7 +451,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "tzdata": { diff --git a/setup.py b/setup.py index 069adcc..8fc0cd8 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', + 'intactness-pipeline @ git+https://github.com/cfe-lab/HIVIntact.git@cfe-1.2', 'pyyaml' ], package_data={ From 7f4eba1267a313ee602ae51548006b8eaaea964e Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 9 Jun 2023 09:13:19 -0700 Subject: [PATCH 07/47] Generate table_precursor table from hivintact output --- gene_splicer/utils.py | 78 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 12 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index b01b5bd..c11ab20 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -5,6 +5,7 @@ import typing import yaml +import json import shutil import subprocess as sp import pandas as pd @@ -391,14 +392,54 @@ def align(target_seq, return alignment_path -def generate_table_precursor(name, outpath, add_columns=None): - # Output csv - precursor_path: Path = outpath / 'table_precursor.csv' +HIVINTACT_TRANSLATION_TABLE = { + 'APOBECHypermutationDetected': 'Hypermut', + 'LongDeletion': 'LargeDeletion', + 'PackagingSignalDeletion': '5DEFECT', + 'WrongORFNumber': 'PrematureStop_OR_AAtooLong_OR_AAtooShort', + } - # Load filtered sequences - filtered_path = outpath / (name + '_filtered.csv') - filtered = pd.read_csv(filtered_path) - # Load hivseqinr data +def translate_hivintact_error(error): + return HIVINTACT_TRANSLATION_TABLE.get(error, error) + +HIVINTACT_ERRORS_TABLE = [ + 'NonHIV', + 'LongDeletion', + 'Scramble', + 'InternalInversion', + 'APOBECHypermutationDetected', + 'MisplacedORF', + 'WrongORFNumber', + 'DeletionInOrf', + 'FrameshiftInOrf', + 'MajorSpliceDonorSiteMutated', + 'PackagingSignalDeletion', + 'PackagingSignalNotComplete', + 'RevResponseElementDeletion', + ] + +def iterate_hivintact_data(name, outpath): + for d in glob.glob(str(outpath / 'hivintact*')): + for (SEQID, sequence) in read_fasta(os.path.join(d, 'intact.fasta')): + row = [SEQID, 'Intact'] + yield row + + with open(os.path.join(d, 'errors.json'), 'r') as f: + js = json.load(f) + for SEQID in js: + all_errors = [obj.get('error') for obj in js[SEQID] if 'error' in obj] + if all_errors: + ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) + verdict = translate_hivintact_error(ordered[0]) + row = [SEQID, verdict] + yield row + +def get_hivintact_data(name, outpath): + column_names = ['SEQID', 'MyVerdict'] + data = iterate_hivintact_data(name, outpath) + return pd.DataFrame(data, columns=column_names) + +def get_hivseqinr_data(name, outpath): seqinr_paths = glob.glob( str(outpath / 'hivseqinr*' / 'Results_Final' / 'Output_MyBigSummary_DF_FINAL.csv')) @@ -409,13 +450,26 @@ def generate_table_precursor(name, outpath, add_columns=None): part = pd.read_csv(path) parts.append(part) # seqinr = pd.read_csv(seqinr_path) + return pd.concat(parts) + +def generate_table_precursor(name, outpath, add_columns=None): + # Output csv + precursor_path: Path = outpath / 'table_precursor.csv' + + # Load filtered sequences + filtered_path = outpath / (name + '_filtered.csv') + filtered = pd.read_csv(filtered_path) + # Load hivseqinr data or HIVIntact results + results = get_hivintact_data(name, outpath) + if results.empty: + results = get_hivseqinr_data(name, outpath) + try: - seqinr = pd.concat(parts) # Assign new columns based on split - seqinr[['name', 'sample', 'reference', - 'seqtype']] = seqinr['SEQID'].str.split('::', expand=True) + results[['name', 'sample', 'reference', + 'seqtype']] = results['SEQID'].str.split('::', expand=True) # Merge - merged = seqinr.merge(filtered, on='sample') + merged = results.merge(filtered, on='sample') except ValueError: with precursor_path.open('w') as output_file: writer = DictWriter(output_file, @@ -448,7 +502,7 @@ def generate_table_precursor(name, outpath, add_columns=None): if add_columns: for key, val in add_columns.items(): merged[key] = val - if parts: + if not results.empty: merged[['sample', 'sequence', 'MyVerdict'] + genes_of_interest].to_csv( precursor_path, index=False) else: From 891b4a88a980baab635196020eda6ad838be139f Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 9 Jun 2023 10:17:38 -0700 Subject: [PATCH 08/47] Remember intact sequences --- gene_splicer/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index c11ab20..d92a4e0 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -419,14 +419,17 @@ def translate_hivintact_error(error): ] def iterate_hivintact_data(name, outpath): + intact = {} for d in glob.glob(str(outpath / 'hivintact*')): for (SEQID, sequence) in read_fasta(os.path.join(d, 'intact.fasta')): row = [SEQID, 'Intact'] + intact[SEQID] = True yield row with open(os.path.join(d, 'errors.json'), 'r') as f: js = json.load(f) for SEQID in js: + if SEQID in intact: continue all_errors = [obj.get('error') for obj in js[SEQID] if 'error' in obj] if all_errors: ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) From b88c4e40f501021f92169b67d7bc471ff14bd67a Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 23 Jun 2023 17:32:42 -0700 Subject: [PATCH 09/47] Update hivintact errors list --- gene_splicer/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index d92a4e0..f576034 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -411,6 +411,8 @@ def translate_hivintact_error(error): 'MisplacedORF', 'WrongORFNumber', 'DeletionInOrf', + 'InsertionInOrf', + 'InternalStopInOrf', 'FrameshiftInOrf', 'MajorSpliceDonorSiteMutated', 'PackagingSignalDeletion', From 0bca5bebf7f7c1f2be0b051bb0736280f378eebd Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 30 Jun 2023 12:11:30 -0700 Subject: [PATCH 10/47] Improve hivintact errors priority and translation --- gene_splicer/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index f576034..b5f65b6 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -396,6 +396,8 @@ def align(target_seq, 'APOBECHypermutationDetected': 'Hypermut', 'LongDeletion': 'LargeDeletion', 'PackagingSignalDeletion': '5DEFECT', + 'PackagingSignalNotComplete': '5DEFECT', + 'MajorSpliceDonorSiteMutated': '5DEFECT', 'WrongORFNumber': 'PrematureStop_OR_AAtooLong_OR_AAtooShort', } @@ -405,19 +407,19 @@ def translate_hivintact_error(error): HIVINTACT_ERRORS_TABLE = [ 'NonHIV', 'LongDeletion', - 'Scramble', 'InternalInversion', + 'Scramble', 'APOBECHypermutationDetected', + 'MajorSpliceDonorSiteMutated', + 'PackagingSignalDeletion', + 'PackagingSignalNotComplete', + 'RevResponseElementDeletion', 'MisplacedORF', 'WrongORFNumber', 'DeletionInOrf', 'InsertionInOrf', 'InternalStopInOrf', 'FrameshiftInOrf', - 'MajorSpliceDonorSiteMutated', - 'PackagingSignalDeletion', - 'PackagingSignalNotComplete', - 'RevResponseElementDeletion', ] def iterate_hivintact_data(name, outpath): From 6ad62bd35d5cd738aa95ca173162acd75cbb8655 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 21 Jul 2023 14:26:37 -0700 Subject: [PATCH 11/47] Bump HIVIntact from cfe-1.2 to cfe-1.3 --- Pipfile | 2 +- Pipfile.lock | 8 ++++---- Singularity | 2 +- setup.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Pipfile b/Pipfile index a9d403d..80785b2 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -intactness-pipeline = {ref = "cfe-1.2", git = "https://github.com/cfe-lab/HIVIntact"} +intactness-pipeline = {ref = "cfe-1.3", git = "https://github.com/cfe-lab/HIVIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index aed6c1c..840919c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "100f2cea5bdbda87105a66b042daed34e24cc3e092e8382794b6f03c5553b02d" + "sha256": "c5e8c3f18599c2490183a8f39736737ebf0b47821cc0908a613f2e95e8ee8684" }, "pipfile-spec": 6, "requires": { @@ -165,7 +165,7 @@ }, "intactness-pipeline": { "git": "https://github.com/cfe-lab/HIVIntact", - "ref": "14f571d873131d2521a6bddcefbf795914ac78c7" + "ref": "2bb83a07b42e78a08c0f40a971d06f9393795717" }, "jarowinkler": { "hashes": [ @@ -342,7 +342,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==2.8.2" }, "python-levenshtein": { @@ -451,7 +451,7 @@ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==1.16.0" }, "tzdata": { diff --git a/Singularity b/Singularity index 5f18945..83379d4 100644 --- a/Singularity +++ b/Singularity @@ -38,7 +38,7 @@ From: ubuntu:22.04 apt-get install -y ncbi-blast+ echo ===== Installing Python packages ===== >/dev/null - pip3 install git+https://github.com/cfe-lab/HIVIntact@cfe-1.2 + pip3 install git+https://github.com/cfe-lab/HIVIntact@cfe-1.3 cd /opt/primer_finder pip3 install . diff --git a/setup.py b/setup.py index 8fc0cd8..42a0f7d 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'intactness-pipeline @ git+https://github.com/cfe-lab/HIVIntact.git@cfe-1.2', + 'intactness-pipeline @ git+https://github.com/cfe-lab/HIVIntact.git@cfe-1.3', 'pyyaml' ], package_data={ From 7e15cf09e22e31f8b265ef09a8018f1dda7ec83e Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 21 Jul 2023 14:31:36 -0700 Subject: [PATCH 12/47] Generate proviral_landscape for HIVIntact runs too --- gene_splicer/sample.py | 2 +- gene_splicer/utils.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gene_splicer/sample.py b/gene_splicer/sample.py index 858782b..8048830 100644 --- a/gene_splicer/sample.py +++ b/gene_splicer/sample.py @@ -115,7 +115,7 @@ def main(): for file in fasta_files: gene_splicer.run(file, outdir=outpath) utils.generate_table_precursor(name=run_name, outpath=outpath) - utils.generate_proviral_landscape_csv(outpath) + utils.generate_proviral_landscape_csv(outpath, is_hivintact=args.hivintact) copy_output(outpath / 'outcome_summary.csv', args.outcome_summary_csv) copy_output(outpath / (run_name + '_conseqs_primer_analysis.csv'), args.conseqs_primers_csv) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index b5f65b6..3f0ff15 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -558,12 +558,14 @@ def generate_table_precursor_2(hivseqinr_resultsfile, filtered_file, return table_precursorfile -def generate_proviral_landscape_csv(outpath): +def generate_proviral_landscape_csv(outpath, is_hivintact): proviral_landscape_csv = os.path.join(outpath, 'proviral_landscape.csv') landscape_rows = [] table_precursor_csv = os.path.join(outpath, 'table_precursor.csv') blastn_csv = glob.glob( + os.path.join(outpath, 'hivintact*', 'blast.tsv') \ + if is_hivintact else \ os.path.join( outpath, 'hivseqinr*', From 719a14bc86c60819f54a605d5efc4a8d12763dfb Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 21 Jul 2023 14:01:52 -0700 Subject: [PATCH 13/47] Singularity: make HIVIntact the default and the only app --- Singularity | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/Singularity b/Singularity index 83379d4..b1bbae6 100644 --- a/Singularity +++ b/Singularity @@ -3,7 +3,7 @@ Bootstrap: docker From: ubuntu:22.04 %help - Search proviral consensus sequences for primers, then use HIVSeqinR to + Search proviral consensus sequences for primers, then use HIVIntact to decide if the genomes are complete. This Singularity container can be run on Kive: http://cfe-lab.github.io/Kive @@ -46,12 +46,6 @@ From: ubuntu:22.04 echo ===== Installing minimap2 ===== >/dev/null apt-get install -y minimap2 - echo ===== Installing hivseqinr ===== >/dev/null - apt-get install -y libz-dev libcurl4-openssl-dev libxml2-dev - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y r-base - Rscript /opt/primer_finder/gene_splicer/configure_r.sh - python3 -m gene_splicer.hivseqinr /opt/hivseqinr - # Clean up apt-get remove -y wget git build-essential @@ -59,18 +53,4 @@ From: ubuntu:22.04 export LANG=en_US.UTF-8 %runscript - gene_splicer_sample --hivseqinr /opt/hivseqinr "$@" - -%apprun hivintact gene_splicer_sample --hivintact "$@" - -%apphelp hivintact - Search proviral consensus sequences for primers, then use HIVIntact to - decide if the genomes are complete. - -%applabels hivintact - KIVE_INPUTS sample_info_csv contigs_csv conseqs_csv cascade_csv - KIVE_OUTPUTS outcome_summary_csv conseqs_primers_csv contigs_primers_csv \ - table_precursor_csv hivseqinr_results_tar - KIVE_THREADS 1 - KIVE_MEMORY 200 From a1294dfa65823c91d1c414e38d75c45b432f95f4 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 21 Jul 2023 15:16:07 -0700 Subject: [PATCH 14/47] Switch to csv outputs in HIVIntact --- gene_splicer/primer_finder.py | 1 + gene_splicer/utils.py | 36 +++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index 6a631cb..40a15f4 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -544,6 +544,7 @@ def run(contigs_csv, '--check-scramble', '--check-internal-inversion', '--include-small-orfs', + '--output-csv', str(no_primers_fasta)], check=True, stdout=log_file, diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 3f0ff15..d4fee4f 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -424,22 +424,38 @@ def translate_hivintact_error(error): def iterate_hivintact_data(name, outpath): intact = {} + + def get_verdict(SEQID, all_errors): + ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) + verdict = translate_hivintact_error(ordered[0]) + return [SEQID, verdict] + for d in glob.glob(str(outpath / 'hivintact*')): for (SEQID, sequence) in read_fasta(os.path.join(d, 'intact.fasta')): row = [SEQID, 'Intact'] intact[SEQID] = True yield row - with open(os.path.join(d, 'errors.json'), 'r') as f: - js = json.load(f) - for SEQID in js: - if SEQID in intact: continue - all_errors = [obj.get('error') for obj in js[SEQID] if 'error' in obj] - if all_errors: - ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) - verdict = translate_hivintact_error(ordered[0]) - row = [SEQID, verdict] - yield row + sequence_name = None + with open(os.path.join(d, 'errors.csv'), 'r') as f: + reader = csv.DictReader(f) + + last_name = None + all_errors = [] + for row in reader: + sequence_name = row['sequence_name'] + if sequence_name in intact: continue + + if last_name != sequence_name and last_name is not None: + if all_errors: + yield get_verdict(sequence_name, all_errors) + all_errors = [] + + all_errors.append(row['error']) + last_name = sequence_name + + if all_errors: + yield get_verdict(sequence_name, all_errors) def get_hivintact_data(name, outpath): column_names = ['SEQID', 'MyVerdict'] From 5c378e9e06c08e55867daadd0118292dcca0d02d Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 21 Jul 2023 15:33:31 -0700 Subject: [PATCH 15/47] Rename "hivseqinr_results_tar" argument to "detailed_results_tar" --- Singularity | 2 +- gene_splicer/sample.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Singularity b/Singularity index b1bbae6..66611f3 100644 --- a/Singularity +++ b/Singularity @@ -13,7 +13,7 @@ From: ubuntu:22.04 MAINTAINER BC CfE in HIV/AIDS https://github.com/cfe-lab/ KIVE_INPUTS sample_info_csv contigs_csv conseqs_csv cascade_csv KIVE_OUTPUTS outcome_summary_csv conseqs_primers_csv contigs_primers_csv \ - table_precursor_csv proviral_landscape_csv hivseqinr_results_tar + table_precursor_csv proviral_landscape_csv detailed_results_tar KIVE_THREADS 1 KIVE_MEMORY 6000 diff --git a/gene_splicer/sample.py b/gene_splicer/sample.py index 8048830..076ce3c 100644 --- a/gene_splicer/sample.py +++ b/gene_splicer/sample.py @@ -43,7 +43,7 @@ def parse_args(): parser.add_argument('proviral_landscape_csv', help='Data for proviral landscape plot', type=FileType('w')) - parser.add_argument('hivseqinr_results_tar', + parser.add_argument('detailed_results_tar', help="Archive file with HIVSeqinR's final results " "folder, or HIVIntact's results.", type=FileType('wb')) @@ -73,7 +73,7 @@ def parse_args(): help='To avoid memory issues in hivseqinr, split the resulting ' 'qc-passed sequences into this number of fastas, each will be ' 'processed sequentially and then all will be merged into the ' - 'final result') + 'final result. Obsolete for HIVIntact.') return parser.parse_args() @@ -94,9 +94,9 @@ def main(): run_name = sample_info.get('run_name', 'kive_run') if args.hivintact: hivseqinr_results_tar = None - hivintact_results_tar = args.hivseqinr_results_tar + hivintact_results_tar = args.detailed_results_tar else: - hivseqinr_results_tar = args.hivseqinr_results_tar + hivseqinr_results_tar = args.detailed_results_tar hivintact_results_tar = None fasta_files = primer_finder.run(contigs_csv=args.contigs_csv, conseqs_csv=args.conseqs_csv, From 56d06d90e8590789ea718446ff11835dafc3d8f6 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 19 Sep 2023 14:40:05 -0700 Subject: [PATCH 16/47] Bump HIVIntact from 1.3 to 1.4 --- Pipfile | 2 +- Pipfile.lock | 224 ++++++++++++++++++++++-------------------- Singularity | 2 +- gene_splicer/utils.py | 42 ++++---- setup.py | 2 +- 5 files changed, 142 insertions(+), 130 deletions(-) diff --git a/Pipfile b/Pipfile index 80785b2..bd1ed10 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -intactness-pipeline = {ref = "cfe-1.3", git = "https://github.com/cfe-lab/HIVIntact"} +intactness-pipeline = {ref = "cfe-1.4", git = "https://github.com/cfe-lab/HIVIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 840919c..05f6d7c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "c5e8c3f18599c2490183a8f39736737ebf0b47821cc0908a613f2e95e8ee8684" + "sha256": "089fff873ef627ba4da8943f369d35baf072b48b4421d4d025abb17d951ffce8" }, "pipfile-spec": 6, "requires": { @@ -55,11 +55,11 @@ }, "certifi": { "hashes": [ - "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7", - "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716" + "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", + "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" ], "markers": "python_version >= '3.6'", - "version": "==2023.5.7" + "version": "==2023.7.22" }, "charset-normalizer": { "hashes": [ @@ -144,11 +144,11 @@ }, "click": { "hashes": [ - "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd", - "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5" + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" ], "markers": "python_version >= '3.7'", - "version": "==8.1.6" + "version": "==8.1.7" }, "gotoh": { "git": "https://github.com/cfe-lab/MiCall.git", @@ -165,7 +165,7 @@ }, "intactness-pipeline": { "git": "https://github.com/cfe-lab/HIVIntact", - "ref": "2bb83a07b42e78a08c0f40a971d06f9393795717" + "ref": "739478485deef5fecae47e89d2baaaa16b763aef" }, "jarowinkler": { "hashes": [ @@ -342,7 +342,7 @@ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "python-levenshtein": { @@ -354,14 +354,16 @@ }, "pytz": { "hashes": [ - "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588", - "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb" + "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b", + "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7" ], - "version": "==2023.3" + "version": "==2023.3.post1" }, "pyyaml": { "hashes": [ + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", @@ -369,7 +371,10 @@ "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", @@ -377,9 +382,12 @@ "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", @@ -394,7 +402,9 @@ "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", @@ -415,43 +425,49 @@ }, "scipy": { "hashes": [ - "sha256:08d957ca82d3535b3b9ba6c8ff355d78fe975271874e2af267cb5add5bd78625", - "sha256:249cfa465c379c9bb2c20123001e151ff5e29b351cbb7f9c91587260602c58d0", - "sha256:366a6a937110d80dca4f63b3f5b00cc89d36f678b2d124a01067b154e692bab1", - "sha256:39154437654260a52871dfde852adf1b93b1d1bc5dc0ffa70068f16ec0be2624", - "sha256:396fae3f8c12ad14c5f3eb40499fd06a6fef8393a6baa352a652ecd51e74e029", - "sha256:3b9963798df1d8a52db41a6fc0e6fa65b1c60e85d73da27ae8bb754de4792481", - "sha256:3e8eb42db36526b130dfbc417609498a6192381abc1975b91e3eb238e0b41c1a", - "sha256:512fdc18c65f76dadaca139348e525646d440220d8d05f6d21965b8d4466bccd", - "sha256:aec8c62fbe52914f9cf28d846cf0401dd80ab80788bbab909434eb336ed07c04", - "sha256:b41a0f322b4eb51b078cb3441e950ad661ede490c3aca66edef66f4b37ab1877", - "sha256:b4bb943010203465ac81efa392e4645265077b4d9e99b66cf3ed33ae12254173", - "sha256:b588311875c58d1acd4ef17c983b9f1ab5391755a47c3d70b6bd503a45bfaf71", - "sha256:ba94eeef3c9caa4cea7b402a35bb02a5714ee1ee77eb98aca1eed4543beb0f4c", - "sha256:be8c962a821957fdde8c4044efdab7a140c13294997a407eaee777acf63cbf0c", - "sha256:cce154372f0ebe88556ed06d7b196e9c2e0c13080ecb58d0f35062dc7cc28b47", - "sha256:d51565560565a0307ed06fa0ec4c6f21ff094947d4844d6068ed04400c72d0c3", - "sha256:e866514bc2d660608447b6ba95c8900d591f2865c07cca0aa4f7ff3c4ca70f30", - "sha256:fb5b492fa035334fd249f0973cc79ecad8b09c604b42a127a677b45a9a3d4289", - "sha256:ffb28e3fa31b9c376d0fb1f74c1f13911c8c154a760312fbee87a21eb21efe31" + "sha256:0f3261f14b767b316d7137c66cc4f33a80ea05841b9c87ad83a726205b901423", + "sha256:10eb6af2f751aa3424762948e5352f707b0dece77288206f227864ddf675aca0", + "sha256:1342ca385c673208f32472830c10110a9dcd053cf0c4b7d4cd7026d0335a6c1d", + "sha256:214cdf04bbae7a54784f8431f976704ed607c4bc69ba0d5d5d6a9df84374df76", + "sha256:2b997a5369e2d30c97995dcb29d638701f8000d04df01b8e947f206e5d0ac788", + "sha256:2c91cf049ffb5575917f2a01da1da082fd24ed48120d08a6e7297dfcac771dcd", + "sha256:3aeb87661de987f8ec56fa6950863994cd427209158255a389fc5aea51fa7055", + "sha256:4447ad057d7597476f9862ecbd9285bbf13ba9d73ce25acfa4e4b11c6801b4c9", + "sha256:542a757e2a6ec409e71df3d8fd20127afbbacb1c07990cb23c5870c13953d899", + "sha256:8d9886f44ef8c9e776cb7527fb01455bf4f4a46c455c4682edc2c2cc8cd78562", + "sha256:90d3b1364e751d8214e325c371f0ee0dd38419268bf4888b2ae1040a6b266b2a", + "sha256:95763fbda1206bec41157582bea482f50eb3702c85fffcf6d24394b071c0e87a", + "sha256:ac74b1512d38718fb6a491c439aa7b3605b96b1ed3be6599c17d49d6c60fca18", + "sha256:afdb0d983f6135d50770dd979df50bf1c7f58b5b33e0eb8cf5c73c70600eae1d", + "sha256:b0620240ef445b5ddde52460e6bc3483b7c9c750275369379e5f609a1050911c", + "sha256:b133f237bd8ba73bad51bc12eb4f2d84cbec999753bf25ba58235e9fc2096d80", + "sha256:b29318a5e39bd200ca4381d80b065cdf3076c7d7281c5e36569e99273867f61d", + "sha256:b8425fa963a32936c9773ee3ce44a765d8ff67eed5f4ac81dc1e4a819a238ee9", + "sha256:d2b813bfbe8dec6a75164523de650bad41f4405d35b0fa24c2c28ae07fcefb20", + "sha256:d690e1ca993c8f7ede6d22e5637541217fc6a4d3f78b3672a6fe454dbb7eb9a7", + "sha256:e367904a0fec76433bf3fbf3e85bf60dae8e9e585ffd21898ab1085a29a04d16", + "sha256:ea932570b1c2a30edafca922345854ff2cd20d43cd9123b6dacfdecebfc1a80b", + "sha256:f28f1f6cfeb48339c192efc6275749b2a25a7e49c4d8369a28b6591da02fbc9a", + "sha256:f73102f769ee06041a3aa26b5841359b1a93cc364ce45609657751795e8f4a4a", + "sha256:fa4909c6c20c3d91480533cddbc0e7c6d849e7d9ded692918c76ce5964997898" ], "markers": "python_version < '3.13' and python_version >= '3.9'", - "version": "==1.11.1" + "version": "==1.11.2" }, "setuptools": { "hashes": [ - "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f", - "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235" + "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87", + "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a" ], - "markers": "python_version >= '3.7'", - "version": "==68.0.0" + "markers": "python_version >= '3.8'", + "version": "==68.2.2" }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "tzdata": { @@ -474,77 +490,69 @@ "develop": { "coverage": { "hashes": [ - "sha256:06a9a2be0b5b576c3f18f1a241f0473575c4a26021b52b2a85263a00f034d51f", - "sha256:06fb182e69f33f6cd1d39a6c597294cff3143554b64b9825d1dc69d18cc2fff2", - "sha256:0a5f9e1dbd7fbe30196578ca36f3fba75376fb99888c395c5880b355e2875f8a", - "sha256:0e1f928eaf5469c11e886fe0885ad2bf1ec606434e79842a879277895a50942a", - "sha256:171717c7cb6b453aebac9a2ef603699da237f341b38eebfee9be75d27dc38e01", - "sha256:1e9d683426464e4a252bf70c3498756055016f99ddaec3774bf368e76bbe02b6", - "sha256:201e7389591af40950a6480bd9edfa8ed04346ff80002cec1a66cac4549c1ad7", - "sha256:245167dd26180ab4c91d5e1496a30be4cd721a5cf2abf52974f965f10f11419f", - "sha256:2aee274c46590717f38ae5e4650988d1af340fe06167546cc32fe2f58ed05b02", - "sha256:2e07b54284e381531c87f785f613b833569c14ecacdcb85d56b25c4622c16c3c", - "sha256:31563e97dae5598556600466ad9beea39fb04e0229e61c12eaa206e0aa202063", - "sha256:33d6d3ea29d5b3a1a632b3c4e4f4ecae24ef170b0b9ee493883f2df10039959a", - "sha256:3d376df58cc111dc8e21e3b6e24606b5bb5dee6024f46a5abca99124b2229ef5", - "sha256:419bfd2caae268623dd469eff96d510a920c90928b60f2073d79f8fe2bbc5959", - "sha256:48c19d2159d433ccc99e729ceae7d5293fbffa0bdb94952d3579983d1c8c9d97", - "sha256:49969a9f7ffa086d973d91cec8d2e31080436ef0fb4a359cae927e742abfaaa6", - "sha256:52edc1a60c0d34afa421c9c37078817b2e67a392cab17d97283b64c5833f427f", - "sha256:537891ae8ce59ef63d0123f7ac9e2ae0fc8b72c7ccbe5296fec45fd68967b6c9", - "sha256:54b896376ab563bd38453cecb813c295cf347cf5906e8b41d340b0321a5433e5", - "sha256:58c2ccc2f00ecb51253cbe5d8d7122a34590fac9646a960d1430d5b15321d95f", - "sha256:5b7540161790b2f28143191f5f8ec02fb132660ff175b7747b95dcb77ac26562", - "sha256:5baa06420f837184130752b7c5ea0808762083bf3487b5038d68b012e5937dbe", - "sha256:5e330fc79bd7207e46c7d7fd2bb4af2963f5f635703925543a70b99574b0fea9", - "sha256:61b9a528fb348373c433e8966535074b802c7a5d7f23c4f421e6c6e2f1697a6f", - "sha256:63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb", - "sha256:6d040ef7c9859bb11dfeb056ff5b3872436e3b5e401817d87a31e1750b9ae2fb", - "sha256:6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1", - "sha256:7ee7d9d4822c8acc74a5e26c50604dff824710bc8de424904c0982e25c39c6cb", - "sha256:81c13a1fc7468c40f13420732805a4c38a105d89848b7c10af65a90beff25250", - "sha256:8d13c64ee2d33eccf7437961b6ea7ad8673e2be040b4f7fd4fd4d4d28d9ccb1e", - "sha256:8de8bb0e5ad103888d65abef8bca41ab93721647590a3f740100cd65c3b00511", - "sha256:8fa03bce9bfbeeef9f3b160a8bed39a221d82308b4152b27d82d8daa7041fee5", - "sha256:924d94291ca674905fe9481f12294eb11f2d3d3fd1adb20314ba89e94f44ed59", - "sha256:975d70ab7e3c80a3fe86001d8751f6778905ec723f5b110aed1e450da9d4b7f2", - "sha256:976b9c42fb2a43ebf304fa7d4a310e5f16cc99992f33eced91ef6f908bd8f33d", - "sha256:9e31cb64d7de6b6f09702bb27c02d1904b3aebfca610c12772452c4e6c21a0d3", - "sha256:a342242fe22407f3c17f4b499276a02b01e80f861f1682ad1d95b04018e0c0d4", - "sha256:a3d33a6b3eae87ceaefa91ffdc130b5e8536182cd6dfdbfc1aa56b46ff8c86de", - "sha256:a895fcc7b15c3fc72beb43cdcbdf0ddb7d2ebc959edac9cef390b0d14f39f8a9", - "sha256:afb17f84d56068a7c29f5fa37bfd38d5aba69e3304af08ee94da8ed5b0865833", - "sha256:b1c546aca0ca4d028901d825015dc8e4d56aac4b541877690eb76490f1dc8ed0", - "sha256:b29019c76039dc3c0fd815c41392a044ce555d9bcdd38b0fb60fb4cd8e475ba9", - "sha256:b46517c02ccd08092f4fa99f24c3b83d8f92f739b4657b0f146246a0ca6a831d", - "sha256:b7aa5f8a41217360e600da646004f878250a0d6738bcdc11a0a39928d7dc2050", - "sha256:b7b4c971f05e6ae490fef852c218b0e79d4e52f79ef0c8475566584a8fb3e01d", - "sha256:ba90a9563ba44a72fda2e85302c3abc71c5589cea608ca16c22b9804262aaeb6", - "sha256:cb017fd1b2603ef59e374ba2063f593abe0fc45f2ad9abdde5b4d83bd922a353", - "sha256:d22656368f0e6189e24722214ed8d66b8022db19d182927b9a248a2a8a2f67eb", - "sha256:d2c2db7fd82e9b72937969bceac4d6ca89660db0a0967614ce2481e81a0b771e", - "sha256:d39b5b4f2a66ccae8b7263ac3c8170994b65266797fb96cbbfd3fb5b23921db8", - "sha256:d62a5c7dad11015c66fbb9d881bc4caa5b12f16292f857842d9d1871595f4495", - "sha256:e7d9405291c6928619403db1d10bd07888888ec1abcbd9748fdaa971d7d661b2", - "sha256:e84606b74eb7de6ff581a7915e2dab7a28a0517fbe1c9239eb227e1354064dcd", - "sha256:eb393e5ebc85245347950143969b241d08b52b88a3dc39479822e073a1a8eb27", - "sha256:ebba1cd308ef115925421d3e6a586e655ca5a77b5bf41e02eb0e4562a111f2d1", - "sha256:ee57190f24fba796e36bb6d3aa8a8783c643d8fa9760c89f7a98ab5455fbf818", - "sha256:f2f67fe12b22cd130d34d0ef79206061bfb5eda52feb6ce0dba0644e20a03cf4", - "sha256:f6951407391b639504e3b3be51b7ba5f3528adbf1a8ac3302b687ecababf929e", - "sha256:f75f7168ab25dd93110c8a8117a22450c19976afbc44234cbf71481094c1b850", - "sha256:fdec9e8cbf13a5bf63290fc6013d216a4c7232efb51548594ca3631a7f13c3a3" + "sha256:025ded371f1ca280c035d91b43252adbb04d2aea4c7105252d3cbc227f03b375", + "sha256:04312b036580ec505f2b77cbbdfb15137d5efdfade09156961f5277149f5e344", + "sha256:0575c37e207bb9b98b6cf72fdaaa18ac909fb3d153083400c2d48e2e6d28bd8e", + "sha256:07d156269718670d00a3b06db2288b48527fc5f36859425ff7cec07c6b367745", + "sha256:1f111a7d85658ea52ffad7084088277135ec5f368457275fc57f11cebb15607f", + "sha256:220eb51f5fb38dfdb7e5d54284ca4d0cd70ddac047d750111a68ab1798945194", + "sha256:229c0dd2ccf956bf5aeede7e3131ca48b65beacde2029f0361b54bf93d36f45a", + "sha256:245c5a99254e83875c7fed8b8b2536f040997a9b76ac4c1da5bff398c06e860f", + "sha256:2829c65c8faaf55b868ed7af3c7477b76b1c6ebeee99a28f59a2cb5907a45760", + "sha256:4aba512a15a3e1e4fdbfed2f5392ec221434a614cc68100ca99dcad7af29f3f8", + "sha256:4c96dd7798d83b960afc6c1feb9e5af537fc4908852ef025600374ff1a017392", + "sha256:50dd1e2dd13dbbd856ffef69196781edff26c800a74f070d3b3e3389cab2600d", + "sha256:5289490dd1c3bb86de4730a92261ae66ea8d44b79ed3cc26464f4c2cde581fbc", + "sha256:53669b79f3d599da95a0afbef039ac0fadbb236532feb042c534fbb81b1a4e40", + "sha256:553d7094cb27db58ea91332e8b5681bac107e7242c23f7629ab1316ee73c4981", + "sha256:586649ada7cf139445da386ab6f8ef00e6172f11a939fc3b2b7e7c9082052fa0", + "sha256:5ae4c6da8b3d123500f9525b50bf0168023313963e0e2e814badf9000dd6ef92", + "sha256:5b4ee7080878077af0afa7238df1b967f00dc10763f6e1b66f5cced4abebb0a3", + "sha256:5d991e13ad2ed3aced177f524e4d670f304c8233edad3210e02c465351f785a0", + "sha256:614f1f98b84eb256e4f35e726bfe5ca82349f8dfa576faabf8a49ca09e630086", + "sha256:636a8ac0b044cfeccae76a36f3b18264edcc810a76a49884b96dd744613ec0b7", + "sha256:6407424621f40205bbe6325686417e5e552f6b2dba3535dd1f90afc88a61d465", + "sha256:6bc6f3f4692d806831c136c5acad5ccedd0262aa44c087c46b7101c77e139140", + "sha256:6cb7fe1581deb67b782c153136541e20901aa312ceedaf1467dcb35255787952", + "sha256:74bb470399dc1989b535cb41f5ca7ab2af561e40def22d7e188e0a445e7639e3", + "sha256:75c8f0df9dfd8ff745bccff75867d63ef336e57cc22b2908ee725cc552689ec8", + "sha256:770f143980cc16eb601ccfd571846e89a5fe4c03b4193f2e485268f224ab602f", + "sha256:7eb0b188f30e41ddd659a529e385470aa6782f3b412f860ce22b2491c89b8593", + "sha256:7eb3cd48d54b9bd0e73026dedce44773214064be93611deab0b6a43158c3d5a0", + "sha256:87d38444efffd5b056fcc026c1e8d862191881143c3aa80bb11fcf9dca9ae204", + "sha256:8a07b692129b8a14ad7a37941a3029c291254feb7a4237f245cfae2de78de037", + "sha256:966f10df9b2b2115da87f50f6a248e313c72a668248be1b9060ce935c871f276", + "sha256:a6191b3a6ad3e09b6cfd75b45c6aeeffe7e3b0ad46b268345d159b8df8d835f9", + "sha256:aab8e9464c00da5cb9c536150b7fbcd8850d376d1151741dd0d16dfe1ba4fd26", + "sha256:ac3c5b7e75acac31e490b7851595212ed951889918d398b7afa12736c85e13ce", + "sha256:ac9ad38204887349853d7c313f53a7b1c210ce138c73859e925bc4e5d8fc18e7", + "sha256:b9c0c19f70d30219113b18fe07e372b244fb2a773d4afde29d5a2f7930765136", + "sha256:c397c70cd20f6df7d2a52283857af622d5f23300c4ca8e5bd8c7a543825baa5a", + "sha256:c6601a60318f9c3945be6ea0f2a80571f4299b6801716f8a6e4846892737ebe4", + "sha256:c6f55d38818ca9596dc9019eae19a47410d5322408140d9a0076001a3dcb938c", + "sha256:ca70466ca3a17460e8fc9cea7123c8cbef5ada4be3140a1ef8f7b63f2f37108f", + "sha256:ca833941ec701fda15414be400c3259479bfde7ae6d806b69e63b3dc423b1832", + "sha256:cd0f7429ecfd1ff597389907045ff209c8fdb5b013d38cfa7c60728cb484b6e3", + "sha256:cd694e19c031733e446c8024dedd12a00cda87e1c10bd7b8539a87963685e969", + "sha256:cdd088c00c39a27cfa5329349cc763a48761fdc785879220d54eb785c8a38520", + "sha256:de30c1aa80f30af0f6b2058a91505ea6e36d6535d437520067f525f7df123887", + "sha256:defbbb51121189722420a208957e26e49809feafca6afeef325df66c39c4fdb3", + "sha256:f09195dda68d94a53123883de75bb97b0e35f5f6f9f3aa5bf6e496da718f0cb6", + "sha256:f12d8b11a54f32688b165fd1a788c408f927b0960984b899be7e4c190ae758f1", + "sha256:f1a317fdf5c122ad642db8a97964733ab7c3cf6009e1a8ae8821089993f175ff", + "sha256:f2781fd3cabc28278dc982a352f50c81c09a1a500cc2086dc4249853ea96b981", + "sha256:f4f456590eefb6e1b3c9ea6328c1e9fa0f1006e7481179d749b3376fc793478e" ], "index": "pypi", - "version": "==7.2.7" + "version": "==7.3.1" }, "exceptiongroup": { "hashes": [ - "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5", - "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f" + "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9", + "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3" ], "markers": "python_version < '3.11'", - "version": "==1.1.2" + "version": "==1.1.3" }, "iniconfig": { "hashes": [ @@ -564,19 +572,19 @@ }, "pluggy": { "hashes": [ - "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849", - "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3" + "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12", + "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7" ], - "markers": "python_version >= '3.7'", - "version": "==1.2.0" + "markers": "python_version >= '3.8'", + "version": "==1.3.0" }, "pytest": { "hashes": [ - "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32", - "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a" + "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002", + "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069" ], "index": "pypi", - "version": "==7.4.0" + "version": "==7.4.2" }, "tomli": { "hashes": [ diff --git a/Singularity b/Singularity index 66611f3..018d4df 100644 --- a/Singularity +++ b/Singularity @@ -38,7 +38,7 @@ From: ubuntu:22.04 apt-get install -y ncbi-blast+ echo ===== Installing Python packages ===== >/dev/null - pip3 install git+https://github.com/cfe-lab/HIVIntact@cfe-1.3 + pip3 install git+https://github.com/cfe-lab/HIVIntact@cfe-1.4 cd /opt/primer_finder pip3 install . diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index d4fee4f..e088a31 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -580,7 +580,7 @@ def generate_proviral_landscape_csv(outpath, is_hivintact): table_precursor_csv = os.path.join(outpath, 'table_precursor.csv') blastn_csv = glob.glob( - os.path.join(outpath, 'hivintact*', 'blast.tsv') \ + os.path.join(outpath, 'hivintact*', 'blast.csv') \ if is_hivintact else \ os.path.join( outpath, @@ -590,25 +590,29 @@ def generate_proviral_landscape_csv(outpath, is_hivintact): ) )[0] - blastn_columns = ['qseqid', - 'qlen', - 'sseqid', - 'sgi', - 'slen', - 'qstart', - 'qend', - 'sstart', - 'send', - 'evalue', - 'bitscore', - 'length', - 'pident', - 'nident', - 'btop', - 'stitle', - 'sstrand'] with open(blastn_csv, 'r') as blastn_file: - blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns, delimiter='\t') + if is_hivintact: + blastn_reader = DictReader(blastn_file) + else: + blastn_columns = ['qseqid', + 'qlen', + 'sseqid', + 'sgi', + 'slen', + 'qstart', + 'qend', + 'sstart', + 'send', + 'evalue', + 'bitscore', + 'length', + 'pident', + 'nident', + 'btop', + 'stitle', + 'sstrand'] + blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns, delimiter='\t') + for row in blastn_reader: if row['qseqid'] in ['8E5LAV', 'HXB2']: # skip the positive control rows diff --git a/setup.py b/setup.py index 42a0f7d..299c967 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'intactness-pipeline @ git+https://github.com/cfe-lab/HIVIntact.git@cfe-1.3', + 'intactness-pipeline @ git+https://github.com/cfe-lab/HIVIntact.git@cfe-1.4', 'pyyaml' ], package_data={ From eae7b0f32dd789d7becf721dad7c0754f183d3b1 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 19 Sep 2023 16:43:22 -0700 Subject: [PATCH 17/47] Do not translate HIVIntact errors to HIVSeqinR ones --- gene_splicer/utils.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index e088a31..19cfa0f 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -391,19 +391,6 @@ def align(target_seq, else: return alignment_path - -HIVINTACT_TRANSLATION_TABLE = { - 'APOBECHypermutationDetected': 'Hypermut', - 'LongDeletion': 'LargeDeletion', - 'PackagingSignalDeletion': '5DEFECT', - 'PackagingSignalNotComplete': '5DEFECT', - 'MajorSpliceDonorSiteMutated': '5DEFECT', - 'WrongORFNumber': 'PrematureStop_OR_AAtooLong_OR_AAtooShort', - } - -def translate_hivintact_error(error): - return HIVINTACT_TRANSLATION_TABLE.get(error, error) - HIVINTACT_ERRORS_TABLE = [ 'NonHIV', 'LongDeletion', @@ -427,7 +414,7 @@ def iterate_hivintact_data(name, outpath): def get_verdict(SEQID, all_errors): ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) - verdict = translate_hivintact_error(ordered[0]) + verdict = ordered[0] return [SEQID, verdict] for d in glob.glob(str(outpath / 'hivintact*')): From 775f10078018ee54254092b4b939a8f59ab432aa Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 19 Sep 2023 17:12:30 -0700 Subject: [PATCH 18/47] Fix table precursor construction --- gene_splicer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 19cfa0f..8a4df3e 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -435,7 +435,7 @@ def get_verdict(SEQID, all_errors): if last_name != sequence_name and last_name is not None: if all_errors: - yield get_verdict(sequence_name, all_errors) + yield get_verdict(last_name, all_errors) all_errors = [] all_errors.append(row['error']) From 120fe49aaef16ee63bf2622a5e556dec62ecba7f Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 19 Sep 2023 17:13:24 -0700 Subject: [PATCH 19/47] Add missing codes to the HIVINACT_ERRORS_TABLE --- gene_splicer/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 8a4df3e..5ec660e 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -392,6 +392,8 @@ def align(target_seq, return alignment_path HIVINTACT_ERRORS_TABLE = [ + 'AlignmentFailed', + 'InvalidCodon', 'NonHIV', 'LongDeletion', 'InternalInversion', From 53672414d50951a05b4f6af5e2a2d06bfb4e6727 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 6 Mar 2024 18:27:21 -0800 Subject: [PATCH 20/47] fix the check for when to use HIVSeqinR results --- gene_splicer/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 5ec660e..87e6220 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -472,9 +472,13 @@ def generate_table_precursor(name, outpath, add_columns=None): filtered_path = outpath / (name + '_filtered.csv') filtered = pd.read_csv(filtered_path) # Load hivseqinr data or HIVIntact results - results = get_hivintact_data(name, outpath) - if results.empty: + + if any(outpath.glob('hivintact*')): + results = get_hivintact_data(name, outpath) + elif any(outpath.glob('hivseqinr*')): results = get_hivseqinr_data(name, outpath) + else: + raise RuntimeError("Neither HIVIntact nor HIVSeqinR directory exists.") try: # Assign new columns based on split From 95ae330a1dc7e0293adcec8e8e2aca1a1de4c1a8 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 6 Mar 2024 22:19:32 -0800 Subject: [PATCH 21/47] Various code improvements --- gene_splicer/utils.py | 51 ++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 87e6220..a658b91 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -12,6 +12,8 @@ import glob from pathlib import Path from csv import DictWriter, DictReader +from itertools import groupby +from operator import itemgetter logger = logging.getLogger(__name__) @@ -412,39 +414,26 @@ def align(target_seq, ] def iterate_hivintact_data(name, outpath): - intact = {} + intact = set() def get_verdict(SEQID, all_errors): ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) verdict = ordered[0] return [SEQID, verdict] - for d in glob.glob(str(outpath / 'hivintact*')): + for d in outpath.glob('hivintact*'): for (SEQID, sequence) in read_fasta(os.path.join(d, 'intact.fasta')): - row = [SEQID, 'Intact'] - intact[SEQID] = True - yield row + yield [SEQID, 'Intact'] + intact.add(SEQID) - sequence_name = None with open(os.path.join(d, 'errors.csv'), 'r') as f: reader = csv.DictReader(f) + grouped = groupby(reader, key=itemgetter('sequence_name')) + for sequence_name, errors in grouped: + if sequence_name not in intact: + all_errors = [error['error'] for error in errors] + yield get_verdict(sequence_name, all_errors) - last_name = None - all_errors = [] - for row in reader: - sequence_name = row['sequence_name'] - if sequence_name in intact: continue - - if last_name != sequence_name and last_name is not None: - if all_errors: - yield get_verdict(last_name, all_errors) - all_errors = [] - - all_errors.append(row['error']) - last_name = sequence_name - - if all_errors: - yield get_verdict(sequence_name, all_errors) def get_hivintact_data(name, outpath): column_names = ['SEQID', 'MyVerdict'] @@ -572,16 +561,14 @@ def generate_proviral_landscape_csv(outpath, is_hivintact): landscape_rows = [] table_precursor_csv = os.path.join(outpath, 'table_precursor.csv') - blastn_csv = glob.glob( - os.path.join(outpath, 'hivintact*', 'blast.csv') \ - if is_hivintact else \ - os.path.join( - outpath, - 'hivseqinr*', - 'Results_Intermediate', - 'Output_Blastn_HXB2MEGA28_tabdelim.txt' - ) - )[0] + + if is_hivintact: + subpath = os.path.join(outpath, 'hivintact*', 'blast.csv') + else: + subpath = os.path.join(outpath, 'hivseqinr*', 'Results_Intermediate', 'Output_Blastn_HXB2MEGA28_tabdelim.txt') + + blastn_csvs = glob.glob(subpath) + blastn_csv = blastn_csvs[0] with open(blastn_csv, 'r') as blastn_file: if is_hivintact: From 9d725a12f15c068a5ac6512e5c1a57b9d6f95e3d Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 8 Jul 2024 14:08:02 -0700 Subject: [PATCH 22/47] Switch to CFEIntact --- Pipfile | 2 +- Pipfile.lock | 4 ---- Singularity | 2 +- setup.py | 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Pipfile b/Pipfile index bd1ed10..5ebd2cf 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -intactness-pipeline = {ref = "cfe-1.4", git = "https://github.com/cfe-lab/HIVIntact"} +cfeintact = {ref = "v1.18.4", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 05f6d7c..ffff191 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -163,10 +163,6 @@ "markers": "python_version >= '3.5'", "version": "==3.4" }, - "intactness-pipeline": { - "git": "https://github.com/cfe-lab/HIVIntact", - "ref": "739478485deef5fecae47e89d2baaaa16b763aef" - }, "jarowinkler": { "hashes": [ "sha256:07df473a812772794181885fc8e9950b629809297c8a1c00e06d0376cb6f5611", diff --git a/Singularity b/Singularity index 018d4df..513dd05 100644 --- a/Singularity +++ b/Singularity @@ -38,7 +38,7 @@ From: ubuntu:22.04 apt-get install -y ncbi-blast+ echo ===== Installing Python packages ===== >/dev/null - pip3 install git+https://github.com/cfe-lab/HIVIntact@cfe-1.4 + pip3 install git+https://github.com/cfe-lab/CFEIntact@v1.18.4 cd /opt/primer_finder pip3 install . diff --git a/setup.py b/setup.py index 299c967..1ebaeb7 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'intactness-pipeline @ git+https://github.com/cfe-lab/HIVIntact.git@cfe-1.4', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.4', 'pyyaml' ], package_data={ From 1c5eca69d7be4587099a1ad65019b2092caeb58c Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 8 Jul 2024 14:09:58 -0700 Subject: [PATCH 23/47] Update Pipfile.lock --- Pipfile.lock | 581 ++++++++++++++++++++++----------------------------- 1 file changed, 249 insertions(+), 332 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index ffff191..e378d44 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "089fff873ef627ba4da8943f369d35baf072b48b4421d4d025abb17d951ffce8" + "sha256": "6a9e94b9fb771cdbc70c29e68954211f6778ff1be83c9713a16fe72315bc5a16" }, "pipfile-spec": 6, "requires": { @@ -16,131 +16,148 @@ ] }, "default": { - "appdirs": { - "hashes": [ - "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", - "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" - ], - "version": "==1.4.4" + "aligntools": { + "git": "git+https://github.com/cfe-lab/aligntools.git", + "ref": "94c65f42b8b2008de4259c837eda4e9378636cb3" }, "biopython": { "hashes": [ - "sha256:11d673698b3d0d6589292ea951fb62cb24ea27d273eca0d08dbbd956690f97f5", - "sha256:2cf38112b6d8415ad39d6a611988cd11fb5f33eb09346666a87263beba9614e0", - "sha256:2f9cfaf16d55ab80d514e7aebe5710dabe4e4ff47ede851031202e33b3249da3", - "sha256:35506e39822c52d11cf09a3951e82375ca1bb9303960b4286acf02c9a6f6c4cc", - "sha256:3b36ba1bf6395c09a365c53530c9d71f3617763fa2c1d452b3d8948368c0f1de", - "sha256:655df416936662c0c8a06a549cb25e1560e1fea5067d850f34fb714b8a3fae6c", - "sha256:65b93b513ce9dd7b2ce058720eadf42cd03f312db3409356efeb93123d1320aa", - "sha256:6ebfbce0d91796c7aef422ee9dffe8827e07e5abaa94545e006f1f20e965c80b", - "sha256:762c6c43a8486b5fcd07f136a3217b87d24755618b9ea9da1f17124ff44c2ad6", - "sha256:793c42a376cd63f62f8a088ce39b7dc6b5c55e4e9031d887c434de1595bfa4b8", - "sha256:7a168709694e10b338718c18d967edd5b56c237dc88642c22275796007a70000", - "sha256:7c5c07123ff5f44c9e6b5369df854a38afd3c0c50ef58498a0ae8f7eb799f3e8", - "sha256:811796f8d222aa3869a50e31e54ce62b69106b47cd8bb06934867c0d843297b5", - "sha256:8bb0c690c7368f255ed45236bf0f5464b476b8c083c8f634533921af78278261", - "sha256:919a2c583cabf9c96d2ae4e1245a6b0376932fb342aca302a0fc198b71ab3275", - "sha256:97cbdbed01b2512471f36c74b91658d1dfbdcbf39bc038f6ce5a41c3e60a8fc6", - "sha256:9ba33244f0eff830beaa7240065bdb5095d96fded6599b76bbb9ddab45cd2bbd", - "sha256:a51d9c1d1b4b634447535da74a644fae59bc234fbbf9001e2dc6b6fbabb98019", - "sha256:b09efcb4733c8770f25eab5fe555a96a08f5ab9e1bc36939e08ebf2ffbf3e0f1", - "sha256:b37c0d24191e5c96ca02415a5188551980c83a0d518bbc4ffe3c9a5d1fe0ee81", - "sha256:ccd729249fd5f586dd4c2a3507c2ea2456825d7e615e97c07c409c850eaf4594", - "sha256:e41b55edcfd448630e77bf4de66a7235324a8a149621499891da6bd1d5085b9a", - "sha256:ee51bb1cd7decffd24da6b76d5e01b7e2fd818ab85cf0c180226cbb5793a3abd", - "sha256:ef7c79b65b0b3f3c7dc59e20a7f8ae5758d8e852cb8b9cace590dc5617e348ba" + "sha256:0c425a39871f652598f502671aa5f6b5125475a91333a368a47f9c611ca96db1", + "sha256:25f4ef193a307e403eb532e8f060b818e2d75f65803a2b0f4e645b0cae131b4e", + "sha256:2cb8e839ab472244b6082635ad1df67c94c05df0bd02a023103ed00ea66c4d20", + "sha256:2d4ed30aebd96b4aadeb1f04adce92795c696f5bd56d1fd45517b89059918dd4", + "sha256:2dc2e77490725060330003f73b6b7d5172f8bc160f180de5877a2e899ad999d4", + "sha256:3c8beded38884abae4c74cb6ce54142da670273fd0b2919bd0f84f6e34d3056b", + "sha256:4f39b38e7625c33384954130d90f19984e91cb5af64e2fb128603359f86884ad", + "sha256:507ac3956f3107e77fee362ecb048dafb5f97cbcf110012d091418430c3227c7", + "sha256:52b6098f47d6b90fc8a5e8579b81ee50047e9108f0976e69c891ae0c4817e42d", + "sha256:60fbe6f996e8a6866a42698c17e552127d99a9aab3259d6249fbaabd0e0cc7b4", + "sha256:61765b71f84814a1eeb55ab222f43330aa7ad3e55ab91e8b444706149c67a281", + "sha256:7b69d59f9a762c5bb5f77ed03f197dad05ebd702c34d2cae7be98f1f30e04089", + "sha256:7f4c746825721ec367c2f2d6a8cda3bc6495a1e084e5b2fbab26e9467706603f", + "sha256:894ee7533cca7f5f9769e2595fbe7b0dba5018f39a2170753d101a13e7585ff4", + "sha256:89ef3967f5a88b5bb6344bef75ae83386de53fed3966d5c8c334ad885f8db08a", + "sha256:9e3f7476fd81f31e048965d7be2826f018686e870d96870f440b609002953954", + "sha256:9fbd4b67d3e71b0d716a1712ab8b4e57981c6549ba17ce5626ffa8976d170da7", + "sha256:b51ef31bfb79872a182a85b4113625e1b553c024bb1586c72ac98b479f8d8fe4", + "sha256:b64575363bb2344073c949fd69a0bf3ea015b447aa1494e4813376855b479118", + "sha256:ba58a6d76288333c5f178a426116953fa68204bd0cfc401694087dd4f96d0059", + "sha256:c792508988fc3ccf18eaae2a826c9cd97f1c27fb55bb87bdce6a101fee9f5a0c", + "sha256:ca8d6a88b9a9718074b3f5b450f9ea5adf7112a7dbaed55d82d5b623f5859a01", + "sha256:ecff2fcf5da29b600474c0bfcdbbac0f98b25e22fe60a853d0ee798c00f7396c", + "sha256:ee3566f6dc3acf20e238540daf896f0af20cff531521bf41fdf5143f73e209ae", + "sha256:f4c1c9ad7da9eaf8d8f4515bf931a7f6548a468e7ef29b8429e31aaff2d95f4c" ], - "markers": "python_version >= '3.7'", - "version": "==1.81" + "markers": "python_version >= '3.9'", + "version": "==1.84" }, "certifi": { "hashes": [ - "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", - "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" + "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b", + "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90" ], "markers": "python_version >= '3.6'", - "version": "==2023.7.22" + "version": "==2024.7.4" + }, + "cfeintact": { + "git": "https://github.com/cfe-lab/CFEIntact", + "ref": "a6ea095998a894e795c89a896a5fdc83ed9bc715" }, "charset-normalizer": { "hashes": [ - "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96", - "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c", - "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710", - "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706", - "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020", - "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252", - "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad", - "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329", - "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a", - "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f", - "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6", - "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4", - "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a", - "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46", - "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2", - "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23", - "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", - "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd", - "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982", - "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10", - "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2", - "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea", - "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09", - "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5", - "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149", - "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489", - "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9", - "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80", - "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592", - "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3", - "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6", - "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed", - "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c", - "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200", - "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a", - "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e", - "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d", - "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6", - "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623", - "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669", - "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3", - "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa", - "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9", - "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2", - "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f", - "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1", - "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4", - "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a", - "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8", - "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3", - "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029", - "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f", - "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959", - "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22", - "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7", - "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952", - "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346", - "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e", - "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d", - "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299", - "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd", - "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a", - "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3", - "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037", - "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94", - "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c", - "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858", - "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a", - "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449", - "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c", - "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918", - "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1", - "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c", - "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac", - "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa" + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" ], "markers": "python_full_version >= '3.7.0'", - "version": "==3.2.0" + "version": "==3.3.2" }, "click": { "hashes": [ @@ -152,124 +169,17 @@ }, "gotoh": { "git": "https://github.com/cfe-lab/MiCall.git", + "markers": "python_version >= '3.8' and python_version < '4.0'", "ref": "f1687e7b7c7f1f3a6f3cb63107f1cf9b2b210f26", "subdirectory": "micall/alignment" }, "idna": { "hashes": [ - "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", - "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", + "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" ], "markers": "python_version >= '3.5'", - "version": "==3.4" - }, - "jarowinkler": { - "hashes": [ - "sha256:07df473a812772794181885fc8e9950b629809297c8a1c00e06d0376cb6f5611", - "sha256:0dd61e79babfbca37f6f4d2b81bfbc92979e5e22f02d04ba5e762d84901a95bf", - "sha256:1097b349e09c6ae2d92520ef0ab79580b6b136f6f1c1d62ad783595011f0f260", - "sha256:18da76d3a6d7a0898f36525a1ce8303fcb5413d1bcbc30c3f3634344aeecf397", - "sha256:18e3af57ac066a617bd688d62b9d0da11da32dca977d9fe5c1726040be26ad2e", - "sha256:1b243a43ef1740bdec3101243347ceb59f698f28df0c514935f4cf856af22795", - "sha256:21869871774ea4a34222538c33704234ee8e1b4c1a82fe95471215994575e631", - "sha256:21ae65449c52b14578fd28f51c2efdd976a632979054cf12e714cc86fdc1d1aa", - "sha256:28654c3cfd1f917900a44650cee3a6827210c1f1783ef5aca3399ee31ee2cd17", - "sha256:288c615134ec2d5d122fb834eb0e134f5ccd0080ce1091e2f8170d861de4c24d", - "sha256:298c708bd8609b0563846cd770891f4fc6492ea1c09ef7ac24a68731f4ede37a", - "sha256:2daa79de5856d34ee6a813d9b049d55aad7014a92ce1d90fb3e487338ec362ef", - "sha256:3af4e4aab7c6ba14f75bd74a21ee00befed67cd2221e626c5741545b4a57c60c", - "sha256:439d66dd82a452535293c2503a0930c2aacc4ebf9542f0ca52b351084e9f3e32", - "sha256:447c9b1323e7b16ff21da9121164b54c4a806f352f716b2a6e1f937acabc6e73", - "sha256:46e042d75ee91e1fe678ad0bdb6eea4d6d052f6e6ee35adac8bf5d01942e1f6a", - "sha256:49796215bd66bb87d2d88da7131b785330b3b2e50cbd7a7be75b4964512f5aa9", - "sha256:4a2297958acccf63da521f1e7d1c17e3f074db6bf6d4d9eb8c888e638fff2feb", - "sha256:4ea218d666041f41434957816e0a52e8533e7e191c8302ca062ebfa4ec42220d", - "sha256:4f91e8ee2b81c44d8f4aec164e84a976fcabe754fe107efae3eae2e9fb433ffd", - "sha256:4feec944743bdcd099b8967d16802c78f1009f3222a241b3d7424795ad301c54", - "sha256:4ffe4a84dba6a7cc9411f5185677a7fa86087d3a036281f837eec7a1afd93a34", - "sha256:50bac973e0aec697d73bf6b601e027e6079779fb9f6b0905eaefb055536bec39", - "sha256:51eeb42de858363e93c3407568b3fec1919b99a5ffb6d5c4e3dc494a12d37241", - "sha256:532c89ab12246f36338500b7c7c36b87389e01fca93eec74680423e5e5678677", - "sha256:53b9f9ca5cd56c82500171abc4818ef9f756e77e995ac57046f598fba2642f78", - "sha256:5586164f7063fd9d1704ba136041f5811d847e994dabb973ce4741f8d512a586", - "sha256:5719f55bbc84ff08e8ef8d6a87ee936dfa2d29554ae2fc2888214a336c660cbd", - "sha256:5aa5645ed7b77ebfa18f9cf7276dfe532d00d64c551fdbdf086c1583a40a5079", - "sha256:5b8a0e1476e7a0cf316e32acd02733f6dba38a19e57c8aa58dad8cbb69627b54", - "sha256:5e83b734568aeabf71a89b8f9a7b9630eca71de68e74701d306d56f9e8621c3f", - "sha256:5e954870ba9e8ad3ffdda976a71379b9cc8474195caa3009d89dda350cf5d0fe", - "sha256:66aa6d4e961d956da7508d9bf837686e2b957db14a19dbfb0aefe259f9c6a177", - "sha256:68a77b3f262fa90ec30563a50835c760f7417a2cf55138a77606f2def1a4d8b3", - "sha256:6e49f8c2258bdab01fef9dd8111811de8ec000a7b6f5a12283f2322ce5f473e3", - "sha256:6ea3f8e772debaf85ecf9b0aa07f9fd8de3bfaf52595edaa86c979309658afdc", - "sha256:71a41f8d34bb315ea245a9c78e1dc40e58e560ba699ae34932f397271eaa830e", - "sha256:7253c25288294474d98e269dd73d7e8d9f503655c77180201788c6f29848bb4e", - "sha256:786db72036f9b43aa6e4848584580ff8d0a33816f67050cc1d17f283a9446002", - "sha256:7986c585540262e2abe3badda0e4982291f6513bd3cd313447b0faf77fae454f", - "sha256:7dcfe9a47ec5e1c544add253660475fe44b771b0cc1b5d959ca9bdad8f778e65", - "sha256:7f964945c52bb21058718f1e074a14d231bff1dad83c8e8bd1607ed6add4b0fa", - "sha256:813f626b8f2703275e7ad18b842cedc1e6d06e4a334337f96b5a91afcda78ed2", - "sha256:8493cb25ae8627272537f40b6fdfb376824e38d1e8e7e48196e49494bbdc78f3", - "sha256:8b77e53d9a1a8aa84f6c3817790d0fa336a42f726277d9e5a0cf2420337349ee", - "sha256:8e85bb480eee04681d7f99ce95e86ec8d9182204737a3d141f5a97216d164d6c", - "sha256:8ed3635427c04c8680807ecf6b71014c145ae760c22243f8ff6dd1a8cc7fa695", - "sha256:91ccb6b51cff6158a7f699e0912ab243b7f0026d63919a7696214303e709a21e", - "sha256:93cb99fc11b44db61631eea23294f6ae66e944d27129b2856e52f66f11eb8082", - "sha256:967a10aed9fca73b826ab41d859ac6a35021ac39efeea5991070964db10a9b13", - "sha256:97175ef8bf47e796280c899c8d72788313e277a30cd5c4a549bbab60ce70e5f5", - "sha256:9b2c89b9893c2c0fb1c7369160e2a08258415df5345019dd61c3e15c2ca74b65", - "sha256:9bd54d4635bc9d01510fff1545b4ec1e26bddafde0aff6af1af4e46b80407e9e", - "sha256:9f968112e0b8be55b259e041be1f9f294931c8790f014c5c04f7c1ffe7928b78", - "sha256:a03972d2878e6954852ffce67a843de8a30c515eaa257313b609151e16036bab", - "sha256:a3f03c8178b94380e103c9368e84b88bfca437e59e484dc71d8b059d43c6e8dc", - "sha256:a71063e01863f561d86459929ad7c5f6c389922aada4170b67ab7c266e6cf96c", - "sha256:aa12fa8a788804fca8fde0f24c14015f3adf18b2336adb66526e326c15b59c72", - "sha256:ae3b951ff925a5c1fc7746845d796ce34891313813f6c3bc2d057759c8090c47", - "sha256:aea994d5673e9c3b49d548b58f961448bd8a2ba40d3244c1809c891ed29daa02", - "sha256:af28ea284cfbd1b21b29ff94b759f20e94e4f7c06f424b0b4702e701c2a21668", - "sha256:b24b58bd62de20cc773b0b55352d0a43d6cf2beb9b0a21bbaf5ca1f6f50d3d44", - "sha256:b29a7adb25bf02f1e007fec412a67a5c3c8de1ba062454de539e623eb638fcaf", - "sha256:b39b7cdb985f6c040830f047cd98a0563bbfa909944130223c23667432b39c73", - "sha256:b56248ab6e734b40309b6337b0de5cb37e7f0e71d64c7f5f0d58bcb46c05699d", - "sha256:b59b4e82ade4b9b147646189b500f2085e06c8c7746dd6311e03bc4d4ad126e8", - "sha256:b696f0f80df13e8e86958a9d0eaa9218a6a311b5c566f6a081ef17d7d594713b", - "sha256:b73bba435e9cd7618130907d753c708c84baddec5ee6e2637f9630f02496b189", - "sha256:b91d6fe7fd12c5d3bb82b644500df13ee0f7ae949f067e6d967be896aa340732", - "sha256:b959d3fcf4ffe865ee518328d77d137ea7b6ad0c8f1f8b96b7a08cd97d3a9c87", - "sha256:bbb94c0d894cde960b264f3f797c99cbe316e0280ea1b81e240d6ee4ec19fa0c", - "sha256:bc0dcc31ce493aa70067e1f7ed2cb1528b8bd86bb276f25b6c09fabf746b3df7", - "sha256:bc8182ba6561a19eddbbd88106b986b93ae11205919cea36385a260d2146c638", - "sha256:c0eac3a71193575002e2c374ff7be5ef4005e9370c29dad83e2537f57d09e07e", - "sha256:c1f3e5d5137419a608a878b76ec277c1618119259134ef94e323d5e7fdc2acfd", - "sha256:c834dd86fc4f372c0cd6ec7a33432e49e644de7b5d37f520b96500cab7e9d992", - "sha256:cba8a789610e97d29c850370c6c8f68c0481355446a356bfa0b2703d8afb8436", - "sha256:d2cf546b18f9d49d25f33dd564c06fbe29c0e3090d062bad84ba04e77fc7d168", - "sha256:d2debc08e15e6c16999c27c1afc4c2493c0d3f140206d24970872f4619ea840a", - "sha256:d4638b6b4569e418365aa12d8175025b93336bb074288ec8b9b259734da9990e", - "sha256:d5399e513b58496483eeda61ff180676fc6ff9c3b6ef53af3c53be0777e71247", - "sha256:d6648d9c68bcc79f80092fa00e9f897df12b9826f05b7211260b494742ae3e12", - "sha256:d8da4660934bbf3958e6bd0165ab088d6e65ef6cee0c52d82e86d424ca1be96f", - "sha256:da09cbcbb917d99fb341730dbb7892b7a642ef0ca371c7f3a647b4dae6770190", - "sha256:db49e56139da097b5d85f323b1ed906a5d9d6d3a4336ce694910852d0a4cd607", - "sha256:dd1cd8a99f7f3347d3b30941460531a0ee8b855f199a3b56ac6d49aa98266600", - "sha256:e2d2c6341b021b146db418c77ee71d4318013074761681aba42c1d332a723f7d", - "sha256:e36a5af0db07010e3cc70000edebc2cdb92c39beb2d10d721604a7a52c48100d", - "sha256:e3edd98b7fa078b06b1bd0e12d7e244c875e7030ad242eb31719f2f87e343862", - "sha256:e40905895ddbce8cbdc5f079299371630e771db3c0e7820b2d262c4bb6a8bea0", - "sha256:e480e39ed2420a881ac445f6fea8064c36f535970deb4ee94677afe06985b917", - "sha256:e68cb387d79871b45d20a670bdd33b0f9edb08ed85aa7a5eb19dafdecfa1c091", - "sha256:e8c6dba59166803347c96f48e1af608f8bbc8efe9d545e1a3f9bfb526e76fd62", - "sha256:eaa520f9b4e6e955269a117d7481332b06aff3fb04981fd218294793ba4ae5fc", - "sha256:ef39035486ce07745a0fee9dd80bd9a0b692811111da4ef9aedbc0ddd23ff9cd", - "sha256:f2afe56a6cb3e84cc77af6e4a1e8eb6f4f6211a8dd0468237aeee27e16501752", - "sha256:f8dd58576c81e8115ca29dc757feb413fd689d194789670a533384997306385e", - "sha256:fa1db18ba0a0fe383e9396e2db91d31fcabfc0ff03fa599b5a10edc57416084a", - "sha256:fa2d76d3572229ad282dd7ed0005387e9085bdfd954a7636a6f920530e3b670d", - "sha256:fa55d91bcf097b464df6efa92762434aa3026a9774ab2509895a1948bf64b121", - "sha256:fbcb4aafadf3ac758de12deb3c90c4e4b6497a104d00ecc8cb6585757af3ab90", - "sha256:fd4b9111a2092eaaaabd7dd33aa8703d734075a2f75ec87976eab0a2b60273ac" - ], - "markers": "python_version >= '3.6'", - "version": "==1.2.3" + "version": "==3.7" }, "numpy": { "hashes": [ @@ -300,6 +210,7 @@ "sha256:fd67b306320dcadea700a8f79b9e671e607f8696e98ec255915c0c6d6b818503" ], "index": "pypi", + "markers": "python_version >= '3.9'", "version": "==1.25.1" }, "pandas": { @@ -331,15 +242,16 @@ "sha256:f908a77cbeef9bbd646bd4b81214cbef9ac3dda4181d5092a4aa9797d1bc7774" ], "index": "pypi", + "markers": "python_version >= '3.8'", "version": "==2.0.2" }, "python-dateutil": { "hashes": [ - "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", - "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.8.2" + "version": "==2.9.0.post0" }, "python-levenshtein": { "hashes": [ @@ -350,10 +262,10 @@ }, "pytz": { "hashes": [ - "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b", - "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7" + "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", + "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319" ], - "version": "==2023.3.post1" + "version": "==2024.1" }, "pyyaml": { "hashes": [ @@ -386,6 +298,7 @@ "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", + "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", @@ -409,6 +322,7 @@ "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" ], "index": "pypi", + "markers": "python_version >= '3.6'", "version": "==6.0.1" }, "requests": { @@ -417,46 +331,47 @@ "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==2.31.0" }, "scipy": { "hashes": [ - "sha256:0f3261f14b767b316d7137c66cc4f33a80ea05841b9c87ad83a726205b901423", - "sha256:10eb6af2f751aa3424762948e5352f707b0dece77288206f227864ddf675aca0", - "sha256:1342ca385c673208f32472830c10110a9dcd053cf0c4b7d4cd7026d0335a6c1d", - "sha256:214cdf04bbae7a54784f8431f976704ed607c4bc69ba0d5d5d6a9df84374df76", - "sha256:2b997a5369e2d30c97995dcb29d638701f8000d04df01b8e947f206e5d0ac788", - "sha256:2c91cf049ffb5575917f2a01da1da082fd24ed48120d08a6e7297dfcac771dcd", - "sha256:3aeb87661de987f8ec56fa6950863994cd427209158255a389fc5aea51fa7055", - "sha256:4447ad057d7597476f9862ecbd9285bbf13ba9d73ce25acfa4e4b11c6801b4c9", - "sha256:542a757e2a6ec409e71df3d8fd20127afbbacb1c07990cb23c5870c13953d899", - "sha256:8d9886f44ef8c9e776cb7527fb01455bf4f4a46c455c4682edc2c2cc8cd78562", - "sha256:90d3b1364e751d8214e325c371f0ee0dd38419268bf4888b2ae1040a6b266b2a", - "sha256:95763fbda1206bec41157582bea482f50eb3702c85fffcf6d24394b071c0e87a", - "sha256:ac74b1512d38718fb6a491c439aa7b3605b96b1ed3be6599c17d49d6c60fca18", - "sha256:afdb0d983f6135d50770dd979df50bf1c7f58b5b33e0eb8cf5c73c70600eae1d", - "sha256:b0620240ef445b5ddde52460e6bc3483b7c9c750275369379e5f609a1050911c", - "sha256:b133f237bd8ba73bad51bc12eb4f2d84cbec999753bf25ba58235e9fc2096d80", - "sha256:b29318a5e39bd200ca4381d80b065cdf3076c7d7281c5e36569e99273867f61d", - "sha256:b8425fa963a32936c9773ee3ce44a765d8ff67eed5f4ac81dc1e4a819a238ee9", - "sha256:d2b813bfbe8dec6a75164523de650bad41f4405d35b0fa24c2c28ae07fcefb20", - "sha256:d690e1ca993c8f7ede6d22e5637541217fc6a4d3f78b3672a6fe454dbb7eb9a7", - "sha256:e367904a0fec76433bf3fbf3e85bf60dae8e9e585ffd21898ab1085a29a04d16", - "sha256:ea932570b1c2a30edafca922345854ff2cd20d43cd9123b6dacfdecebfc1a80b", - "sha256:f28f1f6cfeb48339c192efc6275749b2a25a7e49c4d8369a28b6591da02fbc9a", - "sha256:f73102f769ee06041a3aa26b5841359b1a93cc364ce45609657751795e8f4a4a", - "sha256:fa4909c6c20c3d91480533cddbc0e7c6d849e7d9ded692918c76ce5964997898" + "sha256:076c27284c768b84a45dcf2e914d4000aac537da74236a0d45d82c6fa4b7b3c0", + "sha256:07e179dc0205a50721022344fb85074f772eadbda1e1b3eecdc483f8033709b7", + "sha256:176c6f0d0470a32f1b2efaf40c3d37a24876cebf447498a4cefb947a79c21e9d", + "sha256:42470ea0195336df319741e230626b6225a740fd9dce9642ca13e98f667047c0", + "sha256:4c4161597c75043f7154238ef419c29a64ac4a7c889d588ea77690ac4d0d9b20", + "sha256:5b083c8940028bb7e0b4172acafda6df762da1927b9091f9611b0bcd8676f2bc", + "sha256:64b2ff514a98cf2bb734a9f90d32dc89dc6ad4a4a36a312cd0d6327170339eb0", + "sha256:65df4da3c12a2bb9ad52b86b4dcf46813e869afb006e58be0f516bc370165159", + "sha256:687af0a35462402dd851726295c1a5ae5f987bd6e9026f52e9505994e2f84ef6", + "sha256:6a9c9a9b226d9a21e0a208bdb024c3982932e43811b62d202aaf1bb59af264b1", + "sha256:6d056a8709ccda6cf36cdd2eac597d13bc03dba38360f418560a93050c76a16e", + "sha256:7d3da42fbbbb860211a811782504f38ae7aaec9de8764a9bef6b262de7a2b50f", + "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484", + "sha256:94c164a9e2498e68308e6e148646e486d979f7fcdb8b4cf34b5441894bdb9caf", + "sha256:9e3154691b9f7ed73778d746da2df67a19d046a6c8087c8b385bc4cdb2cfca74", + "sha256:9eee2989868e274aae26125345584254d97c56194c072ed96cb433f32f692ed8", + "sha256:a01cc03bcdc777c9da3cfdcc74b5a75caffb48a6c39c8450a9a05f82c4250a14", + "sha256:a7d46c3e0aea5c064e734c3eac5cf9eb1f8c4ceee756262f2c7327c4c2691c86", + "sha256:ad36af9626d27a4326c8e884917b7ec321d8a1841cd6dacc67d2a9e90c2f0359", + "sha256:b5923f48cb840380f9854339176ef21763118a7300a88203ccd0bdd26e58527b", + "sha256:bbc0471b5f22c11c389075d091d3885693fd3f5e9a54ce051b46308bc787e5d4", + "sha256:bff2438ea1330e06e53c424893ec0072640dac00f29c6a43a575cbae4c99b2b9", + "sha256:c40003d880f39c11c1edbae8144e3813904b10514cd3d3d00c277ae996488cdb", + "sha256:d91db2c41dd6c20646af280355d41dfa1ec7eead235642178bd57635a3f82209", + "sha256:f0a50da861a7ec4573b7c716b2ebdcdf142b66b756a0d392c236ae568b3a93fb" ], - "markers": "python_version < '3.13' and python_version >= '3.9'", - "version": "==1.11.2" + "markers": "python_version >= '3.10'", + "version": "==1.14.0" }, "setuptools": { "hashes": [ - "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87", - "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a" + "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05", + "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1" ], "markers": "python_version >= '3.8'", - "version": "==68.2.2" + "version": "==70.2.0" }, "six": { "hashes": [ @@ -468,87 +383,88 @@ }, "tzdata": { "hashes": [ - "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a", - "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda" + "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd", + "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252" ], "markers": "python_version >= '2'", - "version": "==2023.3" + "version": "==2024.1" }, "urllib3": { "hashes": [ - "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11", - "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4" + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" ], - "markers": "python_version >= '3.7'", - "version": "==2.0.4" + "markers": "python_version >= '3.8'", + "version": "==2.2.2" } }, "develop": { "coverage": { "hashes": [ - "sha256:025ded371f1ca280c035d91b43252adbb04d2aea4c7105252d3cbc227f03b375", - "sha256:04312b036580ec505f2b77cbbdfb15137d5efdfade09156961f5277149f5e344", - "sha256:0575c37e207bb9b98b6cf72fdaaa18ac909fb3d153083400c2d48e2e6d28bd8e", - "sha256:07d156269718670d00a3b06db2288b48527fc5f36859425ff7cec07c6b367745", - "sha256:1f111a7d85658ea52ffad7084088277135ec5f368457275fc57f11cebb15607f", - "sha256:220eb51f5fb38dfdb7e5d54284ca4d0cd70ddac047d750111a68ab1798945194", - "sha256:229c0dd2ccf956bf5aeede7e3131ca48b65beacde2029f0361b54bf93d36f45a", - "sha256:245c5a99254e83875c7fed8b8b2536f040997a9b76ac4c1da5bff398c06e860f", - "sha256:2829c65c8faaf55b868ed7af3c7477b76b1c6ebeee99a28f59a2cb5907a45760", - "sha256:4aba512a15a3e1e4fdbfed2f5392ec221434a614cc68100ca99dcad7af29f3f8", - "sha256:4c96dd7798d83b960afc6c1feb9e5af537fc4908852ef025600374ff1a017392", - "sha256:50dd1e2dd13dbbd856ffef69196781edff26c800a74f070d3b3e3389cab2600d", - "sha256:5289490dd1c3bb86de4730a92261ae66ea8d44b79ed3cc26464f4c2cde581fbc", - "sha256:53669b79f3d599da95a0afbef039ac0fadbb236532feb042c534fbb81b1a4e40", - "sha256:553d7094cb27db58ea91332e8b5681bac107e7242c23f7629ab1316ee73c4981", - "sha256:586649ada7cf139445da386ab6f8ef00e6172f11a939fc3b2b7e7c9082052fa0", - "sha256:5ae4c6da8b3d123500f9525b50bf0168023313963e0e2e814badf9000dd6ef92", - "sha256:5b4ee7080878077af0afa7238df1b967f00dc10763f6e1b66f5cced4abebb0a3", - "sha256:5d991e13ad2ed3aced177f524e4d670f304c8233edad3210e02c465351f785a0", - "sha256:614f1f98b84eb256e4f35e726bfe5ca82349f8dfa576faabf8a49ca09e630086", - "sha256:636a8ac0b044cfeccae76a36f3b18264edcc810a76a49884b96dd744613ec0b7", - "sha256:6407424621f40205bbe6325686417e5e552f6b2dba3535dd1f90afc88a61d465", - "sha256:6bc6f3f4692d806831c136c5acad5ccedd0262aa44c087c46b7101c77e139140", - "sha256:6cb7fe1581deb67b782c153136541e20901aa312ceedaf1467dcb35255787952", - "sha256:74bb470399dc1989b535cb41f5ca7ab2af561e40def22d7e188e0a445e7639e3", - "sha256:75c8f0df9dfd8ff745bccff75867d63ef336e57cc22b2908ee725cc552689ec8", - "sha256:770f143980cc16eb601ccfd571846e89a5fe4c03b4193f2e485268f224ab602f", - "sha256:7eb0b188f30e41ddd659a529e385470aa6782f3b412f860ce22b2491c89b8593", - "sha256:7eb3cd48d54b9bd0e73026dedce44773214064be93611deab0b6a43158c3d5a0", - "sha256:87d38444efffd5b056fcc026c1e8d862191881143c3aa80bb11fcf9dca9ae204", - "sha256:8a07b692129b8a14ad7a37941a3029c291254feb7a4237f245cfae2de78de037", - "sha256:966f10df9b2b2115da87f50f6a248e313c72a668248be1b9060ce935c871f276", - "sha256:a6191b3a6ad3e09b6cfd75b45c6aeeffe7e3b0ad46b268345d159b8df8d835f9", - "sha256:aab8e9464c00da5cb9c536150b7fbcd8850d376d1151741dd0d16dfe1ba4fd26", - "sha256:ac3c5b7e75acac31e490b7851595212ed951889918d398b7afa12736c85e13ce", - "sha256:ac9ad38204887349853d7c313f53a7b1c210ce138c73859e925bc4e5d8fc18e7", - "sha256:b9c0c19f70d30219113b18fe07e372b244fb2a773d4afde29d5a2f7930765136", - "sha256:c397c70cd20f6df7d2a52283857af622d5f23300c4ca8e5bd8c7a543825baa5a", - "sha256:c6601a60318f9c3945be6ea0f2a80571f4299b6801716f8a6e4846892737ebe4", - "sha256:c6f55d38818ca9596dc9019eae19a47410d5322408140d9a0076001a3dcb938c", - "sha256:ca70466ca3a17460e8fc9cea7123c8cbef5ada4be3140a1ef8f7b63f2f37108f", - "sha256:ca833941ec701fda15414be400c3259479bfde7ae6d806b69e63b3dc423b1832", - "sha256:cd0f7429ecfd1ff597389907045ff209c8fdb5b013d38cfa7c60728cb484b6e3", - "sha256:cd694e19c031733e446c8024dedd12a00cda87e1c10bd7b8539a87963685e969", - "sha256:cdd088c00c39a27cfa5329349cc763a48761fdc785879220d54eb785c8a38520", - "sha256:de30c1aa80f30af0f6b2058a91505ea6e36d6535d437520067f525f7df123887", - "sha256:defbbb51121189722420a208957e26e49809feafca6afeef325df66c39c4fdb3", - "sha256:f09195dda68d94a53123883de75bb97b0e35f5f6f9f3aa5bf6e496da718f0cb6", - "sha256:f12d8b11a54f32688b165fd1a788c408f927b0960984b899be7e4c190ae758f1", - "sha256:f1a317fdf5c122ad642db8a97964733ab7c3cf6009e1a8ae8821089993f175ff", - "sha256:f2781fd3cabc28278dc982a352f50c81c09a1a500cc2086dc4249853ea96b981", - "sha256:f4f456590eefb6e1b3c9ea6328c1e9fa0f1006e7481179d749b3376fc793478e" + "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f", + "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d", + "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747", + "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f", + "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d", + "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f", + "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47", + "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e", + "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba", + "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c", + "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b", + "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4", + "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7", + "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555", + "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233", + "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace", + "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805", + "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136", + "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4", + "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d", + "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806", + "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99", + "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8", + "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b", + "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5", + "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da", + "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0", + "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078", + "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f", + "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029", + "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353", + "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638", + "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9", + "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f", + "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7", + "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3", + "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e", + "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016", + "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088", + "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4", + "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882", + "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7", + "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53", + "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d", + "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080", + "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5", + "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d", + "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c", + "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8", + "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633", + "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9", + "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c" ], "index": "pypi", - "version": "==7.3.1" + "markers": "python_version >= '3.8'", + "version": "==7.5.4" }, "exceptiongroup": { "hashes": [ - "sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9", - "sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3" + "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad", + "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16" ], "markers": "python_version < '3.11'", - "version": "==1.1.3" + "version": "==1.2.1" }, "iniconfig": { "hashes": [ @@ -560,27 +476,28 @@ }, "packaging": { "hashes": [ - "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", - "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" ], - "markers": "python_version >= '3.7'", - "version": "==23.1" + "markers": "python_version >= '3.8'", + "version": "==24.1" }, "pluggy": { "hashes": [ - "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12", - "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7" + "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", + "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" ], "markers": "python_version >= '3.8'", - "version": "==1.3.0" + "version": "==1.5.0" }, "pytest": { "hashes": [ - "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002", - "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069" + "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343", + "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977" ], "index": "pypi", - "version": "==7.4.2" + "markers": "python_version >= '3.8'", + "version": "==8.2.2" }, "tomli": { "hashes": [ From 3a0bf9decc34514bf2b60bcac157603051820b76 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 8 Jul 2024 15:01:31 -0700 Subject: [PATCH 24/47] Update cfeintact command --- gene_splicer/primer_finder.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index 40a15f4..adf50b7 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -534,17 +534,11 @@ def run(contigs_csv, working_path: Path = outpath / f'hivintact_{i}' working_path.mkdir(exist_ok=True) with (working_path / 'hiv-intact.log').open('w') as log_file: - subprocess.run(['proviral', - 'intact', + subprocess.run(['cfeintact', + 'check', '--working-folder', working_path, '--subtype=B', - '--run-hypermut', - '--check-long-deletion', - '--check-nonhiv', - '--check-scramble', - '--check-internal-inversion', - '--include-small-orfs', - '--output-csv', + '--ignore-distance', str(no_primers_fasta)], check=True, stdout=log_file, From 2345afa1ce17776bfa4f2ff9fbf46dc7d7b1d6ae Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 8 Jul 2024 16:34:42 -0700 Subject: [PATCH 25/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 4 ++-- Singularity | 2 +- setup.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Pipfile b/Pipfile index 5ebd2cf..bfaafec 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.18.4", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.18.7", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index e378d44..3d2ec57 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6a9e94b9fb771cdbc70c29e68954211f6778ff1be83c9713a16fe72315bc5a16" + "sha256": "36616db9203b94cf93d14a00eecbce256e4080aa6e791656ffb40776ad8979e9" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "a6ea095998a894e795c89a896a5fdc83ed9bc715" + "ref": "2e8a134b01053f4a33ba676b5d4f404911321166" }, "charset-normalizer": { "hashes": [ diff --git a/Singularity b/Singularity index 513dd05..af9501c 100644 --- a/Singularity +++ b/Singularity @@ -38,7 +38,7 @@ From: ubuntu:22.04 apt-get install -y ncbi-blast+ echo ===== Installing Python packages ===== >/dev/null - pip3 install git+https://github.com/cfe-lab/CFEIntact@v1.18.4 + pip3 install git+https://github.com/cfe-lab/CFEIntact@v1.18.7 cd /opt/primer_finder pip3 install . diff --git a/setup.py b/setup.py index 1ebaeb7..6500d8f 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.4', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.7', 'pyyaml' ], package_data={ From bc1d1ea6aa0fa7cfc3bf61340d43ec4839a6f866 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 8 Jul 2024 16:51:29 -0700 Subject: [PATCH 26/47] Use CFEIntact's python API instead of operating system's shell --- gene_splicer/primer_finder.py | 40 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index adf50b7..ad3232a 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -6,6 +6,7 @@ from operator import itemgetter import os from tarfile import TarFile +import cfeintact import pandas as pd from pathlib import Path @@ -531,19 +532,32 @@ def run(contigs_csv, archive_hivseqinr_results(working_path, hivseqinr_results_tar) if run_hivintact: - working_path: Path = outpath / f'hivintact_{i}' - working_path.mkdir(exist_ok=True) - with (working_path / 'hiv-intact.log').open('w') as log_file: - subprocess.run(['cfeintact', - 'check', - '--working-folder', working_path, - '--subtype=B', - '--ignore-distance', - str(no_primers_fasta)], - check=True, - stdout=log_file, - stderr=subprocess.STDOUT, - cwd=working_path) + working_path = outpath / f'hivintact_{i}' + log_file_path = working_path / 'hiv-intact.log' + os.makedirs(working_path, exist_ok=True) + + logger = cfeintact.logger + file_handler = logging.FileHandler(log_file_path) + logger.addHandler(file_handler) + + cfeintact.check( + working_dir=working_path, + input_file=str(no_primers_fasta), + subtype="B", + check_packaging_signal=True, + check_rre=True, + check_major_splice_donor_site=True, + check_hypermut=True, + check_long_deletion=True, + check_nonhiv=True, + check_scramble=True, + check_internal_inversion=True, + check_unknown_nucleotides=True, + check_small_orfs=True, + check_distance=False, + output_csv=True, + ) + if hivintact_results_tar is not None: archive_hivintact_results(working_path, hivintact_results_tar) From 514221c930f61453ae6c3dd0c620beb928be3813 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 8 Jul 2024 16:52:25 -0700 Subject: [PATCH 27/47] Do not install cfeintact twice --- Singularity | 2 -- 1 file changed, 2 deletions(-) diff --git a/Singularity b/Singularity index af9501c..4a91529 100644 --- a/Singularity +++ b/Singularity @@ -38,8 +38,6 @@ From: ubuntu:22.04 apt-get install -y ncbi-blast+ echo ===== Installing Python packages ===== >/dev/null - pip3 install git+https://github.com/cfe-lab/CFEIntact@v1.18.7 - cd /opt/primer_finder pip3 install . From 18199519b097475fc7464d8ab95b9212b0bc3996 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 9 Jul 2024 10:56:38 -0700 Subject: [PATCH 28/47] Fix column name of CFEIntact errors.csv --- gene_splicer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index a658b91..875fe95 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -428,7 +428,7 @@ def get_verdict(SEQID, all_errors): with open(os.path.join(d, 'errors.csv'), 'r') as f: reader = csv.DictReader(f) - grouped = groupby(reader, key=itemgetter('sequence_name')) + grouped = groupby(reader, key=itemgetter('qseqid')) for sequence_name, errors in grouped: if sequence_name not in intact: all_errors = [error['error'] for error in errors] From 05aac4f7fd48d6bf2f586ff46894be4aa483c0dc Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 9 Jul 2024 11:48:52 -0700 Subject: [PATCH 29/47] Improve reading of .SAM files The data section does not always begin at line 3. This improvement makes the code able to handle those situations where it does not. --- gene_splicer/utils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 875fe95..effd718 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -334,7 +334,17 @@ def splice_aligned_genes(query, target, samfile, annotation): def load_samfile(samfile_path): - result = pd.read_table(samfile_path, skiprows=2, header=None) + # Open the SAM file and find the starting point for data + with open(samfile_path, 'r') as file: + # Skip meta fields + lines = file.readlines() + data_start_index = 0 + for i, line in enumerate(lines): + if not line.startswith('@'): + data_start_index = i + break + + result = pd.read_table(samfile_path, skiprows=data_start_index, header=None) result['cigar'] = result.apply(split_cigar, axis=1) return result From 49022e0da2bdb3921d5e11fe36a801b9863cb70b Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 9 Jul 2024 14:02:50 -0700 Subject: [PATCH 30/47] Small refactoring of utils.py --- gene_splicer/utils.py | 76 ++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index effd718..d00c09c 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -3,6 +3,7 @@ import os import re import typing +from typing import TextIO, Mapping, Dict, Set, List, Iterable, Tuple import yaml import json @@ -423,45 +424,62 @@ def align(target_seq, 'FrameshiftInOrf', ] -def iterate_hivintact_data(name, outpath): +def iterate_hivintact_verdicts_1(directory: Path, intact: Set[str] = set()) -> Iterable[Tuple[str, str]]: intact = set() - def get_verdict(SEQID, all_errors): + def get_verdict(SEQID: str, all_errors) -> Tuple[str, str]: ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) verdict = ordered[0] - return [SEQID, verdict] + return (SEQID, verdict) - for d in outpath.glob('hivintact*'): - for (SEQID, sequence) in read_fasta(os.path.join(d, 'intact.fasta')): - yield [SEQID, 'Intact'] - intact.add(SEQID) + for (SEQID, sequence) in read_fasta(os.path.join(directory, 'intact.fasta')): + yield (SEQID, 'Intact') + intact.add(SEQID) - with open(os.path.join(d, 'errors.csv'), 'r') as f: - reader = csv.DictReader(f) - grouped = groupby(reader, key=itemgetter('qseqid')) - for sequence_name, errors in grouped: - if sequence_name not in intact: - all_errors = [error['error'] for error in errors] - yield get_verdict(sequence_name, all_errors) + with open(os.path.join(directory, 'errors.csv'), 'r') as f: + reader = csv.DictReader(f) + grouped = groupby(reader, key=itemgetter('qseqid')) + for sequence_name, errors in grouped: + if sequence_name not in intact: + all_errors = [error['error'] for error in errors] + yield get_verdict(sequence_name, all_errors) -def get_hivintact_data(name, outpath): +def iterate_hivintact_verdicts(outpath: Path) -> Iterable[Tuple[str, str]]: + intact: Set[str] = set() + + for directory in outpath.glob('hivintact*'): + yield from iterate_hivintact_verdicts_1(directory, intact) + + +def get_hivintact_verdicts(name, outpath): column_names = ['SEQID', 'MyVerdict'] - data = iterate_hivintact_data(name, outpath) + data = iterate_hivintact_verdicts(outpath) return pd.DataFrame(data, columns=column_names) -def get_hivseqinr_data(name, outpath): - seqinr_paths = glob.glob( - str(outpath / 'hivseqinr*' / 'Results_Final' / - 'Output_MyBigSummary_DF_FINAL.csv')) - parts = [] + +def iterate_hivseqinr_verdicts_1(directory: Path) -> Iterable[Tuple[str, str]]: + path = directory / 'Output_MyBigSummary_DF_FINAL.csv' + if not path.is_file(): + return + + with path.open() as fd: + reader = csv.DictReader(fd) + for row in reader: + yield (row["SEQID"], row["MyVerdict"]) + + +def iterate_hivseqinr_verdicts(outpath: Path) -> Iterable[Tuple[str, str]]: + seqinr_paths = outpath.glob('hivseqinr*/Results_Final/Output_MyBigSummary_DF_FINAL.csv') for path in seqinr_paths: - if not os.path.isfile(path): - continue - part = pd.read_csv(path) - parts.append(part) - # seqinr = pd.read_csv(seqinr_path) - return pd.concat(parts) + yield from iterate_hivseqinr_verdicts_1(path) + + +def get_hivseqinr_verdicts(name, outpath): + column_names = ['SEQID', 'MyVerdict'] + data = iterate_hivseqinr_verdicts(outpath) + return pd.DataFrame(data, columns=column_names) + def generate_table_precursor(name, outpath, add_columns=None): # Output csv @@ -473,9 +491,9 @@ def generate_table_precursor(name, outpath, add_columns=None): # Load hivseqinr data or HIVIntact results if any(outpath.glob('hivintact*')): - results = get_hivintact_data(name, outpath) + results = get_hivintact_verdicts(name, outpath) elif any(outpath.glob('hivseqinr*')): - results = get_hivseqinr_data(name, outpath) + results = get_hivseqinr_verdicts(name, outpath) else: raise RuntimeError("Neither HIVIntact nor HIVSeqinR directory exists.") From 3d53ae8fdd673b2a921fb58d32edc6905c62d789 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 9 Jul 2024 14:04:02 -0700 Subject: [PATCH 31/47] Add a separate proviral_landscapes.py script --- gene_splicer/landscapes.py | 205 +++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 gene_splicer/landscapes.py diff --git a/gene_splicer/landscapes.py b/gene_splicer/landscapes.py new file mode 100644 index 0000000..a5f7dc7 --- /dev/null +++ b/gene_splicer/landscapes.py @@ -0,0 +1,205 @@ +import csv +import logging +import os +import re +import typing +from typing import TextIO, Mapping, Dict, Set, List, Iterable, Tuple +import argparse +import sys + +import json +import shutil +import subprocess as sp +import glob +from pathlib import Path +from csv import DictWriter, DictReader +from itertools import groupby +from operator import itemgetter + +from gene_splicer.utils import ( + iterate_hivintact_verdicts_1, + iterate_hivseqinr_verdicts_1, + LEFT_PRIMER_END, RIGHT_PRIMER_START, +) + + +logger = logging.getLogger(__name__) + + +def generate_proviral_landscape_csv_1_cont(blastn_reader: csv.DictReader, + landscape_writer: csv.DictWriter, + verdicts: Mapping[str, str], + ) -> None: + + for row in blastn_reader: + if row['qseqid'] in ['8E5LAV', 'HXB2']: + # skip the positive control rows + continue + + ref_start = int(row['sstart']) + ref_end = int(row['send']) + if ref_end <= LEFT_PRIMER_END or ref_start >= RIGHT_PRIMER_START: + # skip unspecific matches of LTR at start and end + continue + + qseqid = row['qseqid'] + try: + [run_name, sample_name, _, _] = qseqid.split('::') + except ValueError: + [run_name, sample_name] = [None, qseqid] + + is_inverted = '' + if ref_end < ref_start: + # automatically recognize inverted regions + new_end = ref_start + ref_start = ref_end + ref_end = new_end + is_inverted = 'yes' + + verdict = verdicts.get(sample_name) + landscape_entry = {'ref_start': ref_start, + 'ref_end': ref_end, + 'samp_name': sample_name, + 'run_name': run_name, + 'is_inverted': is_inverted, + 'is_defective': verdict is not None, + 'defect': verdict, + } + + landscape_writer.writerow(landscape_entry) + + +def get_hivintact_verdicts_1_map(details_dir: Path) -> Mapping[str, str]: + ret: Dict[str, str] = {} + + for [qseqid, verdict] in iterate_hivintact_verdicts_1(details_dir): + ret[qseqid] = verdict + + return ret + + +def get_hivseqinr_verdicts_1_map(details_dir: Path) -> Mapping[str, str]: + ret: Dict[str, str] = {} + + for [qseqid, verdict] in iterate_hivseqinr_verdicts_1(details_dir): + ret[qseqid] = verdict + + return ret + + +def generate_proviral_landscape_csv_1(landscape_writer: csv.DictWriter, + details_dir: Path, + ) -> None: + is_hivintact = (details_dir / "holistic.csv").exists() + if is_hivintact: + verdicts = get_hivintact_verdicts_1_map(details_dir) + blastn_path = details_dir / "blast.csv" + else: + verdicts = get_hivseqinr_verdicts_1_map(details_dir) + blastn_path = details_dir / "Results_Intermediate" / "Output_Blastn_HXB2MEGA28_tabdelim.txt" + + with blastn_path.open() as blastn_file: + if is_hivintact: + blastn_reader = DictReader(blastn_file) + else: + blastn_columns = ['qseqid', + 'qlen', + 'sseqid', + 'sgi', + 'slen', + 'qstart', + 'qend', + 'sstart', + 'send', + 'evalue', + 'bitscore', + 'length', + 'pident', + 'nident', + 'btop', + 'stitle', + 'sstrand'] + blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns, delimiter='\t') + + return generate_proviral_landscape_csv_1_cont( + blastn_reader, + landscape_writer, + verdicts, + ) + + + +class UserError(RuntimeError): + def __init__(self, fmt: str, *fmt_args: object): + self.fmt = fmt + self.fmt_args = fmt_args + self.code = 1 + + +def dir_path(string: str) -> Path: + if os.path.exists(string) and os.path.isdir(string): + return Path(string) + else: + raise UserError("Path %r does not exist or is not a directory.", string) + + +def cli_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Generate Proviral Landscape CSV.") + + parser.add_argument("--details_dir", type=dir_path, required=True, + help="Directory containing details files for verdicts.") + + parser.add_argument("--output", type=argparse.FileType("wt"), required=True, + help="Output CSV file for proviral landscape.") + + verbosity_group = parser.add_mutually_exclusive_group() + verbosity_group.add_argument('--verbose', action='store_true', + help='Increase output verbosity.') + verbosity_group.add_argument('--no-verbose', action='store_true', + help='Normal output verbosity.', default=True) + verbosity_group.add_argument('--debug', action='store_true', + help='Maximum output verbosity.') + verbosity_group.add_argument('--quiet', action='store_true', + help='Minimize output verbosity.') + + return parser + + +def main(argv: list) -> int: + parser = cli_parser() + args = parser.parse_args(argv) + if args.quiet: + logger.setLevel(logging.ERROR) + elif args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.WARN) + + logger.debug("Start.") + + fieldnames = ['ref_start', 'ref_end', 'samp_name', 'run_name', 'is_inverted', 'is_defective', 'defect'] + + landscape_writer = csv.DictWriter(args.output, fieldnames=fieldnames) + landscape_writer.writeheader() + generate_proviral_landscape_csv_1(landscape_writer, args.details_dir) + + logger.debug("Done.") + return 0 + + +if __name__ == '__main__': + try: + rc = main(sys.argv[1:]) + except BrokenPipeError: + logger.debug("Broken pipe.") + rc = 0 + except KeyboardInterrupt: + logger.debug("Interrupted.") + rc = 1 + except UserError as e: + logger.fatal(e.fmt, *e.fmt_args) + rc = e.code + + sys.exit(rc) From c1698b4ce83cd34a196165415d6d0a37961d7bbb Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 9 Jul 2024 16:00:18 -0700 Subject: [PATCH 32/47] Fix defect column value for intact sequences --- gene_splicer/landscapes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gene_splicer/landscapes.py b/gene_splicer/landscapes.py index a5f7dc7..62b4456 100644 --- a/gene_splicer/landscapes.py +++ b/gene_splicer/landscapes.py @@ -63,7 +63,7 @@ def generate_proviral_landscape_csv_1_cont(blastn_reader: csv.DictReader, 'run_name': run_name, 'is_inverted': is_inverted, 'is_defective': verdict is not None, - 'defect': verdict, + 'defect': verdict or 'Intact', } landscape_writer.writerow(landscape_entry) From 0925e737f05710155f180bb333b8edd6ba46278b Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 9 Jul 2024 16:47:41 -0700 Subject: [PATCH 33/47] Move all landscapes stuff to landscapes.py --- gene_splicer/landscapes.py | 16 ++++++++ gene_splicer/sample.py | 3 +- gene_splicer/utils.py | 79 -------------------------------------- 3 files changed, 18 insertions(+), 80 deletions(-) diff --git a/gene_splicer/landscapes.py b/gene_splicer/landscapes.py index 62b4456..2fa6c3e 100644 --- a/gene_splicer/landscapes.py +++ b/gene_splicer/landscapes.py @@ -128,6 +128,22 @@ def generate_proviral_landscape_csv_1(landscape_writer: csv.DictWriter, ) +def generate_proviral_landscape_csv(outpath: Path, is_hivintact: bool): + proviral_landscape_csv = os.path.join(outpath, 'proviral_landscape.csv') + + if is_hivintact: + subpath = 'hivintact*' + else: + subpath = 'hivseqinr*' + + landscape_columns = ['samp_name', 'run_name', 'ref_start', 'ref_end', 'defect', 'is_inverted', 'is_defective'] + with open(proviral_landscape_csv, 'w') as landscape_file: + landscape_writer = csv.DictWriter(landscape_file, fieldnames=landscape_columns) + landscape_writer.writeheader() + + for details_dir in outpath.glob(subpath): + generate_proviral_landscape_csv_1(landscape_writer, details_dir) + class UserError(RuntimeError): def __init__(self, fmt: str, *fmt_args: object): diff --git a/gene_splicer/sample.py b/gene_splicer/sample.py index 076ce3c..d00e7fa 100644 --- a/gene_splicer/sample.py +++ b/gene_splicer/sample.py @@ -7,6 +7,7 @@ import gene_splicer.gene_splicer as gene_splicer import gene_splicer.primer_finder as primer_finder import gene_splicer.utils as utils +import gene_splicer.landscapes as landscapes def parse_args(): @@ -115,7 +116,7 @@ def main(): for file in fasta_files: gene_splicer.run(file, outdir=outpath) utils.generate_table_precursor(name=run_name, outpath=outpath) - utils.generate_proviral_landscape_csv(outpath, is_hivintact=args.hivintact) + landscapes.generate_proviral_landscape_csv(outpath, is_hivintact=args.hivintact) copy_output(outpath / 'outcome_summary.csv', args.outcome_summary_csv) copy_output(outpath / (run_name + '_conseqs_primer_analysis.csv'), args.conseqs_primers_csv) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index d00c09c..44ff8cd 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -584,85 +584,6 @@ def generate_table_precursor_2(hivseqinr_resultsfile, filtered_file, return table_precursorfile -def generate_proviral_landscape_csv(outpath, is_hivintact): - proviral_landscape_csv = os.path.join(outpath, 'proviral_landscape.csv') - landscape_rows = [] - - table_precursor_csv = os.path.join(outpath, 'table_precursor.csv') - - if is_hivintact: - subpath = os.path.join(outpath, 'hivintact*', 'blast.csv') - else: - subpath = os.path.join(outpath, 'hivseqinr*', 'Results_Intermediate', 'Output_Blastn_HXB2MEGA28_tabdelim.txt') - - blastn_csvs = glob.glob(subpath) - blastn_csv = blastn_csvs[0] - - with open(blastn_csv, 'r') as blastn_file: - if is_hivintact: - blastn_reader = DictReader(blastn_file) - else: - blastn_columns = ['qseqid', - 'qlen', - 'sseqid', - 'sgi', - 'slen', - 'qstart', - 'qend', - 'sstart', - 'send', - 'evalue', - 'bitscore', - 'length', - 'pident', - 'nident', - 'btop', - 'stitle', - 'sstrand'] - blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns, delimiter='\t') - - for row in blastn_reader: - if row['qseqid'] in ['8E5LAV', 'HXB2']: - # skip the positive control rows - continue - ref_start = int(row['sstart']) - ref_end = int(row['send']) - if ref_end <= LEFT_PRIMER_END or ref_start >= RIGHT_PRIMER_START: - # skip unspecific matches of LTR at start and end - continue - [run_name, sample_name, _, _] = row['qseqid'].split('::') - is_inverted = '' - if ref_end < ref_start: - # automatically recognize inverted regions - new_end = ref_start - ref_start = ref_end - ref_end = new_end - is_inverted = 'yes' - landscape_entry = {'ref_start': ref_start, - 'ref_end': ref_end, - 'samp_name': sample_name, - 'run_name': run_name, - 'is_inverted': is_inverted, - 'is_defective': ''} - # is_defective is empty for now, will be filled manually - landscape_rows.append(landscape_entry) - - with open(table_precursor_csv, 'r') as tab_prec: - tab_prec_reader = DictReader(tab_prec) - for row in tab_prec_reader: - samp_name = row['sample'] - verdict = row['MyVerdict'] - for entry in landscape_rows: - if entry['samp_name'] == samp_name: - entry['defect'] = verdict - - landscape_columns = ['samp_name', 'run_name', 'ref_start', 'ref_end', 'defect', 'is_inverted', 'is_defective'] - with open(proviral_landscape_csv, 'w') as landscape_file: - landscape_writer = csv.DictWriter(landscape_file, fieldnames=landscape_columns) - landscape_writer.writeheader() - landscape_writer.writerows(landscape_rows) - - def get_softclipped_region(query, alignment, alignment_path): try: size, op = alignment.iloc[0]['cigar'][0] From c1ed309dcad6a61b2dee0a1da99e0439fda97b62 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 12 Jul 2024 14:48:07 -0700 Subject: [PATCH 34/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 116 +++++++++++++++++++++++++-------------------------- setup.py | 2 +- 3 files changed, 60 insertions(+), 60 deletions(-) diff --git a/Pipfile b/Pipfile index bfaafec..568ffdc 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.18.7", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.18.8", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 3d2ec57..a5ed682 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "36616db9203b94cf93d14a00eecbce256e4080aa6e791656ffb40776ad8979e9" + "sha256": "9c092e800c5352454e25bd0df1a17c2ff2722892d620bb4869c7da671a1df741" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "2e8a134b01053f4a33ba676b5d4f404911321166" + "ref": "7c67d4ba867ccb3d5469bb34e39d3a4a9c2b293d" }, "charset-normalizer": { "hashes": [ @@ -367,11 +367,11 @@ }, "setuptools": { "hashes": [ - "sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05", - "sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1" + "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5", + "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc" ], "markers": "python_version >= '3.8'", - "version": "==70.2.0" + "version": "==70.3.0" }, "six": { "hashes": [ @@ -401,62 +401,62 @@ "develop": { "coverage": { "hashes": [ - "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f", - "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d", - "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747", - "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f", - "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d", - "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f", - "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47", - "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e", - "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba", - "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c", - "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b", - "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4", - "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7", - "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555", - "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233", - "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace", - "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805", - "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136", - "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4", - "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d", - "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806", - "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99", - "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8", - "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b", - "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5", - "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da", - "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0", - "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078", - "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f", - "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029", - "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353", - "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638", - "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9", - "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f", - "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7", - "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3", - "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e", - "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016", - "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088", - "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4", - "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882", - "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7", - "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53", - "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d", - "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080", - "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5", - "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d", - "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c", - "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8", - "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633", - "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9", - "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c" + "sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382", + "sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1", + "sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac", + "sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee", + "sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166", + "sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57", + "sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c", + "sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b", + "sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51", + "sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da", + "sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450", + "sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2", + "sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd", + "sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d", + "sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d", + "sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6", + "sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca", + "sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169", + "sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1", + "sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713", + "sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b", + "sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6", + "sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c", + "sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605", + "sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463", + "sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b", + "sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6", + "sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5", + "sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63", + "sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c", + "sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783", + "sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44", + "sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca", + "sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8", + "sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d", + "sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390", + "sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933", + "sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67", + "sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b", + "sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03", + "sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b", + "sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791", + "sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb", + "sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807", + "sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6", + "sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2", + "sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428", + "sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd", + "sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c", + "sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94", + "sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8", + "sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==7.5.4" + "version": "==7.6.0" }, "exceptiongroup": { "hashes": [ diff --git a/setup.py b/setup.py index 6500d8f..2abf644 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.7', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.8', 'pyyaml' ], package_data={ From eea015180947ab6ea5b626776c1ac22ef212503b Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 16 Jul 2024 13:49:53 -0700 Subject: [PATCH 35/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 10 +++++----- setup.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Pipfile b/Pipfile index 568ffdc..d5fedd9 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.18.8", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.18.9", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index a5ed682..d4f807c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9c092e800c5352454e25bd0df1a17c2ff2722892d620bb4869c7da671a1df741" + "sha256": "ac639c5687670e64465782b1d6c4d508b3ba6c84f1b22f1225c3c3efffdbac4c" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "7c67d4ba867ccb3d5469bb34e39d3a4a9c2b293d" + "ref": "513b7ce5b332df7601f3a2ac723506ea5ec755a6" }, "charset-normalizer": { "hashes": [ @@ -460,11 +460,11 @@ }, "exceptiongroup": { "hashes": [ - "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad", - "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16" + "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", + "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc" ], "markers": "python_version < '3.11'", - "version": "==1.2.1" + "version": "==1.2.2" }, "iniconfig": { "hashes": [ diff --git a/setup.py b/setup.py index 2abf644..57d3048 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.8', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.9', 'pyyaml' ], package_data={ From 6d18b36e6af442fb41c2648f8d123abb6613d564 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 26 Jul 2024 14:28:15 -0700 Subject: [PATCH 36/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 16 ++++++++-------- setup.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Pipfile b/Pipfile index d5fedd9..fc2f73c 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.18.9", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.18.10", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index d4f807c..0094cbe 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ac639c5687670e64465782b1d6c4d508b3ba6c84f1b22f1225c3c3efffdbac4c" + "sha256": "242d7f1c41b5fee2859daea64f75c76bbb03977c462f29e9920e9ca16f4dff82" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "513b7ce5b332df7601f3a2ac723506ea5ec755a6" + "ref": "6e1a09b878ca1e8ac851b4e7ddcf58e813f69628" }, "charset-normalizer": { "hashes": [ @@ -367,11 +367,11 @@ }, "setuptools": { "hashes": [ - "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5", - "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc" + "sha256:032d42ee9fb536e33087fb66cac5f840eb9391ed05637b3f2a76a7c8fb477936", + "sha256:33874fdc59b3188304b2e7c80d9029097ea31627180896fb549c578ceb8a0855" ], "markers": "python_version >= '3.8'", - "version": "==70.3.0" + "version": "==71.1.0" }, "six": { "hashes": [ @@ -492,12 +492,12 @@ }, "pytest": { "hashes": [ - "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343", - "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977" + "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", + "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==8.2.2" + "version": "==8.3.2" }, "tomli": { "hashes": [ diff --git a/setup.py b/setup.py index 57d3048..abbd699 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.9', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.10', 'pyyaml' ], package_data={ From 2b9a7b657423800e793e09f26d92b8349bd38a13 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Sat, 27 Jul 2024 10:36:32 -0700 Subject: [PATCH 37/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 4 ++-- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Pipfile b/Pipfile index fc2f73c..9157cca 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.18.10", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.19.0", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 0094cbe..301cc74 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "242d7f1c41b5fee2859daea64f75c76bbb03977c462f29e9920e9ca16f4dff82" + "sha256": "9423d0d698fd4b562f454550c5ddc78abb8d2b7111295abbe3a8d2c35d4922b6" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "6e1a09b878ca1e8ac851b4e7ddcf58e813f69628" + "ref": "ec42d6735f519ee8051175ff999e467825072040" }, "charset-normalizer": { "hashes": [ diff --git a/setup.py b/setup.py index abbd699..16da194 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.18.10', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.19.0', 'pyyaml' ], package_data={ From e68bfe57e3fbe19dbe1dd51914739b5db578df43 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Sat, 27 Jul 2024 13:00:05 -0700 Subject: [PATCH 38/47] Update CFEIntact errors table --- gene_splicer/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 44ff8cd..4370a7f 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -422,6 +422,8 @@ def align(target_seq, 'InsertionInOrf', 'InternalStopInOrf', 'FrameshiftInOrf', + 'MutatedStopCodon', + 'MutatedStartCodon', ] def iterate_hivintact_verdicts_1(directory: Path, intact: Set[str] = set()) -> Iterable[Tuple[str, str]]: From 75fcf9c86d0f58858dd5aaa4772947c3d65c5bc2 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Sat, 27 Jul 2024 23:28:02 -0700 Subject: [PATCH 39/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 4 ++-- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Pipfile b/Pipfile index 9157cca..c192f84 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.19.0", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.20.0", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 301cc74..ed45be5 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9423d0d698fd4b562f454550c5ddc78abb8d2b7111295abbe3a8d2c35d4922b6" + "sha256": "95e509a36a7c664fbc0a32dc26e768b6549de116d55171959a711fd3efc00091" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "ec42d6735f519ee8051175ff999e467825072040" + "ref": "b1a6156c70ba76d5f911bb72b0f425b6d4358440" }, "charset-normalizer": { "hashes": [ diff --git a/setup.py b/setup.py index 16da194..d0dfcba 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.19.0', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.20.0', 'pyyaml' ], package_data={ From 731556b244ede9dda7cf1efdc6d41aa60dbfc6af Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Sun, 28 Jul 2024 00:34:14 -0700 Subject: [PATCH 40/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 4 ++-- gene_splicer/utils.py | 22 +++++++++++----------- setup.py | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Pipfile b/Pipfile index c192f84..aecfbf0 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.20.0", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.22.0", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index ed45be5..e59b2c8 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "95e509a36a7c664fbc0a32dc26e768b6549de116d55171959a711fd3efc00091" + "sha256": "56b5477cba506a2df408fcad36d4fc357ecbfd0bedccf9d90ecb7b8e615cb43e" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "b1a6156c70ba76d5f911bb72b0f425b6d4358440" + "ref": "a8387e08d08ab8db2ed5dc70cbe765d74a353e12" }, "charset-normalizer": { "hashes": [ diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 4370a7f..957c5c7 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -405,23 +405,21 @@ def align(target_seq, return alignment_path HIVINTACT_ERRORS_TABLE = [ - 'AlignmentFailed', - 'InvalidCodon', 'NonHIV', 'LongDeletion', 'InternalInversion', 'Scramble', - 'APOBECHypermutationDetected', + 'APOBECHypermutation', 'MajorSpliceDonorSiteMutated', 'PackagingSignalDeletion', 'PackagingSignalNotComplete', 'RevResponseElementDeletion', 'MisplacedORF', 'WrongORFNumber', - 'DeletionInOrf', - 'InsertionInOrf', - 'InternalStopInOrf', - 'FrameshiftInOrf', + 'Deletion', + 'Insertion', + 'InternalStop', + 'Frameshift', 'MutatedStopCodon', 'MutatedStartCodon', ] @@ -434,11 +432,13 @@ def get_verdict(SEQID: str, all_errors) -> Tuple[str, str]: verdict = ordered[0] return (SEQID, verdict) - for (SEQID, sequence) in read_fasta(os.path.join(directory, 'intact.fasta')): - yield (SEQID, 'Intact') - intact.add(SEQID) + with open(os.path.join(directory, 'holistic.csv'), 'r') as f: + reader = csv.DictReader(f) + for row in reader: + if row["intact"] == "True": + intact.add(row["qseqid"]) - with open(os.path.join(directory, 'errors.csv'), 'r') as f: + with open(os.path.join(directory, 'defects.csv'), 'r') as f: reader = csv.DictReader(f) grouped = groupby(reader, key=itemgetter('qseqid')) for sequence_name, errors in grouped: diff --git a/setup.py b/setup.py index d0dfcba..61530de 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.20.0', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.22.0', 'pyyaml' ], package_data={ From 9454adea9ed4f80a3f793374af39463c50511689 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 10:31:34 -0700 Subject: [PATCH 41/47] Fix landscape generation based on sample_name --- gene_splicer/landscapes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gene_splicer/landscapes.py b/gene_splicer/landscapes.py index 2fa6c3e..60fe908 100644 --- a/gene_splicer/landscapes.py +++ b/gene_splicer/landscapes.py @@ -56,7 +56,7 @@ def generate_proviral_landscape_csv_1_cont(blastn_reader: csv.DictReader, ref_end = new_end is_inverted = 'yes' - verdict = verdicts.get(sample_name) + verdict = verdicts.get(qseqid) landscape_entry = {'ref_start': ref_start, 'ref_end': ref_end, 'samp_name': sample_name, From 87aaffb4379c2e2d2e1eea2ac3d4101c0d7f815c Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 14:08:04 -0700 Subject: [PATCH 42/47] Fix ignoring intact sequences in table_precursor and landscapes_plots --- gene_splicer/landscapes.py | 7 ++++--- gene_splicer/utils.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/gene_splicer/landscapes.py b/gene_splicer/landscapes.py index 60fe908..9da65e7 100644 --- a/gene_splicer/landscapes.py +++ b/gene_splicer/landscapes.py @@ -56,14 +56,15 @@ def generate_proviral_landscape_csv_1_cont(blastn_reader: csv.DictReader, ref_end = new_end is_inverted = 'yes' - verdict = verdicts.get(qseqid) + verdict = verdicts[qseqid] + is_defective = verdict != 'Intact' landscape_entry = {'ref_start': ref_start, 'ref_end': ref_end, 'samp_name': sample_name, 'run_name': run_name, 'is_inverted': is_inverted, - 'is_defective': verdict is not None, - 'defect': verdict or 'Intact', + 'is_defective': is_defective, + 'defect': verdict, } landscape_writer.writerow(landscape_entry) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 957c5c7..4669b90 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -428,8 +428,12 @@ def iterate_hivintact_verdicts_1(directory: Path, intact: Set[str] = set()) -> I intact = set() def get_verdict(SEQID: str, all_errors) -> Tuple[str, str]: - ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) - verdict = ordered[0] + if all_errors: + ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) + verdict = ordered[0] + else: + verdict = "Intact" + return (SEQID, verdict) with open(os.path.join(directory, 'holistic.csv'), 'r') as f: @@ -437,6 +441,8 @@ def get_verdict(SEQID: str, all_errors) -> Tuple[str, str]: for row in reader: if row["intact"] == "True": intact.add(row["qseqid"]) + SEQID = row["qseqid"] + yield get_verdict(SEQID, all_errors=[]) with open(os.path.join(directory, 'defects.csv'), 'r') as f: reader = csv.DictReader(f) From 5fc93403ac59a6fbb5ca0619618bb55d8240516a Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 16:34:40 -0700 Subject: [PATCH 43/47] Bump CFEIntact version --- Pipfile | 2 +- Pipfile.lock | 10 +++++----- gene_splicer/utils.py | 14 +++++++------- setup.py | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Pipfile b/Pipfile index aecfbf0..bad630b 100644 --- a/Pipfile +++ b/Pipfile @@ -10,7 +10,7 @@ python-levenshtein = "==0.12.0" pandas = "==2.0.2" requests = "==2.31.0" pyyaml = "*" -cfeintact = {ref = "v1.22.0", git = "https://github.com/cfe-lab/CFEIntact"} +cfeintact = {ref = "v1.23.0", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index e59b2c8..a00bf95 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "56b5477cba506a2df408fcad36d4fc357ecbfd0bedccf9d90ecb7b8e615cb43e" + "sha256": "e765cc4af24f9eca7db1eb84af000daab8d3238c01daa3df4f57ddb03d7b8774" }, "pipfile-spec": 6, "requires": { @@ -61,7 +61,7 @@ }, "cfeintact": { "git": "https://github.com/cfe-lab/CFEIntact", - "ref": "a8387e08d08ab8db2ed5dc70cbe765d74a353e12" + "ref": "70031d6aff59249eb61495d2c63ae467bef6c408" }, "charset-normalizer": { "hashes": [ @@ -367,11 +367,11 @@ }, "setuptools": { "hashes": [ - "sha256:032d42ee9fb536e33087fb66cac5f840eb9391ed05637b3f2a76a7c8fb477936", - "sha256:33874fdc59b3188304b2e7c80d9029097ea31627180896fb549c578ceb8a0855" + "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", + "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" ], "markers": "python_version >= '3.8'", - "version": "==71.1.0" + "version": "==72.1.0" }, "six": { "hashes": [ diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 4669b90..e2c49f9 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -427,9 +427,9 @@ def align(target_seq, def iterate_hivintact_verdicts_1(directory: Path, intact: Set[str] = set()) -> Iterable[Tuple[str, str]]: intact = set() - def get_verdict(SEQID: str, all_errors) -> Tuple[str, str]: - if all_errors: - ordered = sorted(all_errors, key=HIVINTACT_ERRORS_TABLE.index) + def get_verdict(SEQID: str, all_defects) -> Tuple[str, str]: + if all_defects: + ordered = sorted(all_defects, key=HIVINTACT_ERRORS_TABLE.index) verdict = ordered[0] else: verdict = "Intact" @@ -442,15 +442,15 @@ def get_verdict(SEQID: str, all_errors) -> Tuple[str, str]: if row["intact"] == "True": intact.add(row["qseqid"]) SEQID = row["qseqid"] - yield get_verdict(SEQID, all_errors=[]) + yield get_verdict(SEQID, all_defects=[]) with open(os.path.join(directory, 'defects.csv'), 'r') as f: reader = csv.DictReader(f) grouped = groupby(reader, key=itemgetter('qseqid')) - for sequence_name, errors in grouped: + for sequence_name, defects in grouped: if sequence_name not in intact: - all_errors = [error['error'] for error in errors] - yield get_verdict(sequence_name, all_errors) + all_defects = [defect['code'] for defect in defects] + yield get_verdict(sequence_name, all_defects) def iterate_hivintact_verdicts(outpath: Path) -> Iterable[Tuple[str, str]]: diff --git a/setup.py b/setup.py index 61530de..e00e2ec 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ 'python-Levenshtein==0.12.0', 'pandas==2.0.2', 'requests==2.31.0', - 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.22.0', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.23.0', 'pyyaml' ], package_data={ From 28950eb74463e0753325dbc6437ec8a58c3ff73b Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 17:03:10 -0700 Subject: [PATCH 44/47] Bump pandas version --- Pipfile | 2 +- Pipfile.lock | 60 ++++++++++++++++++++++++++++------------------------ setup.py | 2 +- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/Pipfile b/Pipfile index bad630b..d46c6eb 100644 --- a/Pipfile +++ b/Pipfile @@ -7,7 +7,7 @@ name = "pypi" gotoh = {subdirectory = "micall/alignment", ref = "v7.7.0", git = "https://github.com/cfe-lab/MiCall.git"} numpy = "==1.25.1" python-levenshtein = "==0.12.0" -pandas = "==2.0.2" +pandas = "==2.2.2" requests = "==2.31.0" pyyaml = "*" cfeintact = {ref = "v1.23.0", git = "https://github.com/cfe-lab/CFEIntact"} diff --git a/Pipfile.lock b/Pipfile.lock index a00bf95..bcf2914 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "e765cc4af24f9eca7db1eb84af000daab8d3238c01daa3df4f57ddb03d7b8774" + "sha256": "627846a347fac9a0d82e30a61f395bc23a45f30344dcf8144eeb2abebc84d9d0" }, "pipfile-spec": 6, "requires": { @@ -215,35 +215,39 @@ }, "pandas": { "hashes": [ - "sha256:02755de164da6827764ceb3bbc5f64b35cb12394b1024fdf88704d0fa06e0e2f", - "sha256:0a1e0576611641acde15c2322228d138258f236d14b749ad9af498ab69089e2d", - "sha256:1eb09a242184092f424b2edd06eb2b99d06dc07eeddff9929e8667d4ed44e181", - "sha256:30a89d0fec4263ccbf96f68592fd668939481854d2ff9da709d32a047689393b", - "sha256:50e451932b3011b61d2961b4185382c92cc8c6ee4658dcd4f320687bb2d000ee", - "sha256:51a93d422fbb1bd04b67639ba4b5368dffc26923f3ea32a275d2cc450f1d1c86", - "sha256:598e9020d85a8cdbaa1815eb325a91cfff2bb2b23c1442549b8a3668e36f0f77", - "sha256:66d00300f188fa5de73f92d5725ced162488f6dc6ad4cecfe4144ca29debe3b8", - "sha256:69167693cb8f9b3fc060956a5d0a0a8dbfed5f980d9fd2c306fb5b9c855c814c", - "sha256:6d6d10c2142d11d40d6e6c0a190b1f89f525bcf85564707e31b0a39e3b398e08", - "sha256:713f2f70abcdade1ddd68fc91577cb090b3544b07ceba78a12f799355a13ee44", - "sha256:7376e13d28eb16752c398ca1d36ccfe52bf7e887067af9a0474de6331dd948d2", - "sha256:77550c8909ebc23e56a89f91b40ad01b50c42cfbfab49b3393694a50549295ea", - "sha256:7b21cb72958fc49ad757685db1919021d99650d7aaba676576c9e88d3889d456", - "sha256:9ebb9f1c22ddb828e7fd017ea265a59d80461d5a79154b49a4207bd17514d122", - "sha256:a18e5c72b989ff0f7197707ceddc99828320d0ca22ab50dd1b9e37db45b010c0", - "sha256:a6b5f14cd24a2ed06e14255ff40fe2ea0cfaef79a8dd68069b7ace74bd6acbba", - "sha256:b42b120458636a981077cfcfa8568c031b3e8709701315e2bfa866324a83efa8", - "sha256:c4af689352c4fe3d75b2834933ee9d0ccdbf5d7a8a7264f0ce9524e877820c08", - "sha256:c7319b6e68de14e6209460f72a8d1ef13c09fb3d3ef6c37c1e65b35d50b5c145", - "sha256:cf3f0c361a4270185baa89ec7ab92ecaa355fe783791457077473f974f654df5", - "sha256:dd46bde7309088481b1cf9c58e3f0e204b9ff9e3244f441accd220dd3365ce7c", - "sha256:dd5476b6c3fe410ee95926873f377b856dbc4e81a9c605a0dc05aaccc6a7c6c6", - "sha256:e69140bc2d29a8556f55445c15f5794490852af3de0f609a24003ef174528b79", - "sha256:f908a77cbeef9bbd646bd4b81214cbef9ac3dda4181d5092a4aa9797d1bc7774" + "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863", + "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2", + "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1", + "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad", + "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db", + "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76", + "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51", + "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32", + "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08", + "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b", + "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4", + "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921", + "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288", + "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee", + "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0", + "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24", + "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99", + "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151", + "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd", + "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce", + "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57", + "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef", + "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54", + "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a", + "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238", + "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23", + "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772", + "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce", + "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==2.0.2" + "markers": "python_version >= '3.9'", + "version": "==2.2.2" }, "python-dateutil": { "hashes": [ diff --git a/setup.py b/setup.py index e00e2ec..813dd68 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ 'gotoh @ git+https://github.com/cfe-lab/MiCall.git@v7.7.0#egg=gotoh&subdirectory=micall/alignment', 'numpy==1.25.1', 'python-Levenshtein==0.12.0', - 'pandas==2.0.2', + 'pandas==2.2.2', 'requests==2.31.0', 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.23.0', 'pyyaml' From 6098677bdf7b7d90d6a41b2c02b6c811781db5d2 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 17:50:15 -0700 Subject: [PATCH 45/47] Fix reading of SAM files Instead of using pandas, use the builtin csv module. Pandas will not parse a SAM file correctly if it does not have regular number of columns, which some SAM files dont. --- gene_splicer/utils.py | 42 +++++++++++++++++++--------------- tests/test_utils/test_utils.py | 5 ++-- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index e2c49f9..4985ba5 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -101,9 +101,9 @@ def csv_to_bed(csvfile, target_name='HXB2', offset_start=0, offset_stop=0): }) -def split_cigar(row): +def split_cigar(string): pattern = re.compile(r'(\d+)([A-Z])') - cigar = re.findall(pattern, row[5]) + cigar = re.findall(pattern, string) return cigar @@ -214,11 +214,12 @@ def modify_annot(annot): def splice_genes(query, target, samfile, annotation): results = {} - for i, row in samfile.iterrows(): + for i, row in enumerate(samfile): # Subtract 1 to convert target position to zero-base target_pos = int(row[3]) - 1 query_pos = None - for size, op in row['cigar']: + cigar = row[5] + for size, op in split_cigar(cigar): size = int(size) # logger.debug(f'size: {size}, op: {op}') # logger.debug(f'target_pos: {target_pos}, query_pos: {query_pos}') @@ -277,11 +278,12 @@ def coords_to_genes(coords, query): def splice_aligned_genes(query, target, samfile, annotation): results = {} sequences = {} - for i, row in samfile.iterrows(): + for i, row in enumerate(samfile): # Subtract 1 to convert target position to zero-base target_pos = int(row[3]) - 1 query_pos = None - for size, op in row['cigar']: + cigar = row[5] + for size, op in split_cigar(cigar): # print(f'size: {size}, op: {op}') # print(f'target_pos: {target_pos}, query_pos: {query_pos}') size = int(size) @@ -334,19 +336,17 @@ def splice_aligned_genes(query, target, samfile, annotation): return results, sequences -def load_samfile(samfile_path): - # Open the SAM file and find the starting point for data +def load_samfile(samfile_path: Path) -> List[List[str]]: with open(samfile_path, 'r') as file: - # Skip meta fields - lines = file.readlines() - data_start_index = 0 - for i, line in enumerate(lines): - if not line.startswith('@'): - data_start_index = i - break - - result = pd.read_table(samfile_path, skiprows=data_start_index, header=None) - result['cigar'] = result.apply(split_cigar, axis=1) + reader = csv.reader(file, delimiter='\t') + + result = [] + for row in reader: + # Skip header lines (lines starting with '@') + if row[0].startswith('@'): + continue + result.append(row) + return result @@ -594,12 +594,16 @@ def generate_table_precursor_2(hivseqinr_resultsfile, filtered_file, def get_softclipped_region(query, alignment, alignment_path): try: - size, op = alignment.iloc[0]['cigar'][0] + first_match = alignment[0] except IndexError: logger.warning('No alignment in %s!', alignment_path) return + + cigar = first_match[5] + size, op = split_cigar(cigar)[0] if op != 'S': return + size = int(size) return query[:size] diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index d378d2c..28d0adc 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -16,7 +16,8 @@ def test_get_softclip_start(): # Normally this alignment would be generated by a separate function aln_path = example / 'alignment.sam' aln = utils.load_samfile(aln_path) - size, op = aln.iloc[0]['cigar'][0] + cigar = aln[0][5] + size, op = utils.split_cigar(cigar)[0] size = int(size) query_fasta = Fasta(example / 'query.fasta') query_sequence = None @@ -110,4 +111,4 @@ def test_getSamplesFromCascade(): samples = utils.get_samples_from_cascade(cascade) assert len(samples) == 10 for i in range(10): - assert samples[str(i)] == i \ No newline at end of file + assert samples[str(i)] == i From 9f874dac7e0f57416e2ddc685c527d671d679ef6 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 17:58:11 -0700 Subject: [PATCH 46/47] Add missing UnknownNucleotide CFEIntact defect code --- gene_splicer/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index 4985ba5..e7693ae 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -405,6 +405,7 @@ def align(target_seq, return alignment_path HIVINTACT_ERRORS_TABLE = [ + 'UnknownNucleotide', 'NonHIV', 'LongDeletion', 'InternalInversion', From 92c8d39599925898ccf9f539ef8599449ddd0290 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 29 Jul 2024 17:59:19 -0700 Subject: [PATCH 47/47] Add missing SequenceDivergence CFEIntact defect code --- gene_splicer/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index e7693ae..91cb81a 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -423,6 +423,7 @@ def align(target_seq, 'Frameshift', 'MutatedStopCodon', 'MutatedStartCodon', + 'SequenceDivergence', ] def iterate_hivintact_verdicts_1(directory: Path, intact: Set[str] = set()) -> Iterable[Tuple[str, str]]: