diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index a4e8adf..8a9e0d6 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -24,6 +24,7 @@ jobs: echo /opt/minimap2-2.17_x64-linux >> $GITHUB_PATH - name: Install dependencies run: | + sudo apt install -qq mafft ncbi-blast+ python -m pip install --upgrade pip pipenv pipenv install --dev - name: Test with pytest diff --git a/Pipfile b/Pipfile index a039031..d46c6eb 100644 --- a/Pipfile +++ b/Pipfile @@ -7,9 +7,10 @@ name = "pypi" gotoh = {subdirectory = "micall/alignment", ref = "v7.7.0", git = "https://github.com/cfe-lab/MiCall.git"} numpy = "==1.25.1" python-levenshtein = "==0.12.0" -pandas = "==2.0.2" +pandas = "==2.2.2" requests = "==2.31.0" pyyaml = "*" +cfeintact = {ref = "v1.23.0", git = "https://github.com/cfe-lab/CFEIntact"} [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 5bc34d3..bcf2914 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "73432848a2ff11546bd4d06f2cabd2c46876a987e2d0a9e8c0d379cdfd9eb4f5" + "sha256": "627846a347fac9a0d82e30a61f395bc23a45f30344dcf8144eeb2abebc84d9d0" }, "pipfile-spec": 6, "requires": { @@ -16,107 +16,170 @@ ] }, "default": { + "aligntools": { + "git": "git+https://github.com/cfe-lab/aligntools.git", + "ref": "94c65f42b8b2008de4259c837eda4e9378636cb3" + }, + "biopython": { + "hashes": [ + "sha256:0c425a39871f652598f502671aa5f6b5125475a91333a368a47f9c611ca96db1", + "sha256:25f4ef193a307e403eb532e8f060b818e2d75f65803a2b0f4e645b0cae131b4e", + "sha256:2cb8e839ab472244b6082635ad1df67c94c05df0bd02a023103ed00ea66c4d20", + "sha256:2d4ed30aebd96b4aadeb1f04adce92795c696f5bd56d1fd45517b89059918dd4", + "sha256:2dc2e77490725060330003f73b6b7d5172f8bc160f180de5877a2e899ad999d4", + "sha256:3c8beded38884abae4c74cb6ce54142da670273fd0b2919bd0f84f6e34d3056b", + "sha256:4f39b38e7625c33384954130d90f19984e91cb5af64e2fb128603359f86884ad", + "sha256:507ac3956f3107e77fee362ecb048dafb5f97cbcf110012d091418430c3227c7", + "sha256:52b6098f47d6b90fc8a5e8579b81ee50047e9108f0976e69c891ae0c4817e42d", + "sha256:60fbe6f996e8a6866a42698c17e552127d99a9aab3259d6249fbaabd0e0cc7b4", + "sha256:61765b71f84814a1eeb55ab222f43330aa7ad3e55ab91e8b444706149c67a281", + "sha256:7b69d59f9a762c5bb5f77ed03f197dad05ebd702c34d2cae7be98f1f30e04089", + "sha256:7f4c746825721ec367c2f2d6a8cda3bc6495a1e084e5b2fbab26e9467706603f", + "sha256:894ee7533cca7f5f9769e2595fbe7b0dba5018f39a2170753d101a13e7585ff4", + "sha256:89ef3967f5a88b5bb6344bef75ae83386de53fed3966d5c8c334ad885f8db08a", + "sha256:9e3f7476fd81f31e048965d7be2826f018686e870d96870f440b609002953954", + "sha256:9fbd4b67d3e71b0d716a1712ab8b4e57981c6549ba17ce5626ffa8976d170da7", + "sha256:b51ef31bfb79872a182a85b4113625e1b553c024bb1586c72ac98b479f8d8fe4", + "sha256:b64575363bb2344073c949fd69a0bf3ea015b447aa1494e4813376855b479118", + "sha256:ba58a6d76288333c5f178a426116953fa68204bd0cfc401694087dd4f96d0059", + "sha256:c792508988fc3ccf18eaae2a826c9cd97f1c27fb55bb87bdce6a101fee9f5a0c", + "sha256:ca8d6a88b9a9718074b3f5b450f9ea5adf7112a7dbaed55d82d5b623f5859a01", + "sha256:ecff2fcf5da29b600474c0bfcdbbac0f98b25e22fe60a853d0ee798c00f7396c", + "sha256:ee3566f6dc3acf20e238540daf896f0af20cff531521bf41fdf5143f73e209ae", + "sha256:f4c1c9ad7da9eaf8d8f4515bf931a7f6548a468e7ef29b8429e31aaff2d95f4c" + ], + "markers": "python_version >= '3.9'", + "version": "==1.84" + }, "certifi": { "hashes": [ - "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7", - "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716" + "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b", + "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90" ], "markers": "python_version >= '3.6'", - "version": "==2023.5.7" + "version": "==2024.7.4" + }, + "cfeintact": { + "git": "https://github.com/cfe-lab/CFEIntact", + "ref": "70031d6aff59249eb61495d2c63ae467bef6c408" }, "charset-normalizer": { "hashes": [ - "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96", - "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c", - "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710", - "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706", - "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020", - "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252", - "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad", - "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329", - "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a", - "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f", - "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6", - "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4", - "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a", - "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46", - "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2", - "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23", - "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", - "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd", - "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982", - "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10", - "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2", - "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea", - "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09", - "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5", - "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149", - "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489", - "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9", - "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80", - "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592", - "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3", - "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6", - "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed", - "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c", - "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200", - "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a", - "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e", - "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d", - "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6", - "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623", - "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669", - "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3", - "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa", - "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9", - "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2", - "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f", - "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1", - "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4", - "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a", - "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8", - "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3", - "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029", - "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f", - "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959", - "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22", - "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7", - "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952", - "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346", - "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e", - "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d", - "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299", - "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd", - "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a", - "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3", - "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037", - "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94", - "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c", - "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858", - "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a", - "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449", - "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c", - "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918", - "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1", - "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c", - "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac", - "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa" + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" ], "markers": "python_full_version >= '3.7.0'", - "version": "==3.2.0" + "version": "==3.3.2" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" }, "gotoh": { "git": "https://github.com/cfe-lab/MiCall.git", + "markers": "python_version >= '3.8' and python_version < '4.0'", "ref": "f1687e7b7c7f1f3a6f3cb63107f1cf9b2b210f26", "subdirectory": "micall/alignment" }, "idna": { "hashes": [ - "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", - "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", + "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" ], "markers": "python_version >= '3.5'", - "version": "==3.4" + "version": "==3.7" }, "numpy": { "hashes": [ @@ -147,46 +210,52 @@ "sha256:fd67b306320dcadea700a8f79b9e671e607f8696e98ec255915c0c6d6b818503" ], "index": "pypi", + "markers": "python_version >= '3.9'", "version": "==1.25.1" }, "pandas": { "hashes": [ - "sha256:02755de164da6827764ceb3bbc5f64b35cb12394b1024fdf88704d0fa06e0e2f", - "sha256:0a1e0576611641acde15c2322228d138258f236d14b749ad9af498ab69089e2d", - "sha256:1eb09a242184092f424b2edd06eb2b99d06dc07eeddff9929e8667d4ed44e181", - "sha256:30a89d0fec4263ccbf96f68592fd668939481854d2ff9da709d32a047689393b", - "sha256:50e451932b3011b61d2961b4185382c92cc8c6ee4658dcd4f320687bb2d000ee", - "sha256:51a93d422fbb1bd04b67639ba4b5368dffc26923f3ea32a275d2cc450f1d1c86", - "sha256:598e9020d85a8cdbaa1815eb325a91cfff2bb2b23c1442549b8a3668e36f0f77", - "sha256:66d00300f188fa5de73f92d5725ced162488f6dc6ad4cecfe4144ca29debe3b8", - "sha256:69167693cb8f9b3fc060956a5d0a0a8dbfed5f980d9fd2c306fb5b9c855c814c", - "sha256:6d6d10c2142d11d40d6e6c0a190b1f89f525bcf85564707e31b0a39e3b398e08", - "sha256:713f2f70abcdade1ddd68fc91577cb090b3544b07ceba78a12f799355a13ee44", - "sha256:7376e13d28eb16752c398ca1d36ccfe52bf7e887067af9a0474de6331dd948d2", - "sha256:77550c8909ebc23e56a89f91b40ad01b50c42cfbfab49b3393694a50549295ea", - "sha256:7b21cb72958fc49ad757685db1919021d99650d7aaba676576c9e88d3889d456", - "sha256:9ebb9f1c22ddb828e7fd017ea265a59d80461d5a79154b49a4207bd17514d122", - "sha256:a18e5c72b989ff0f7197707ceddc99828320d0ca22ab50dd1b9e37db45b010c0", - "sha256:a6b5f14cd24a2ed06e14255ff40fe2ea0cfaef79a8dd68069b7ace74bd6acbba", - "sha256:b42b120458636a981077cfcfa8568c031b3e8709701315e2bfa866324a83efa8", - "sha256:c4af689352c4fe3d75b2834933ee9d0ccdbf5d7a8a7264f0ce9524e877820c08", - "sha256:c7319b6e68de14e6209460f72a8d1ef13c09fb3d3ef6c37c1e65b35d50b5c145", - "sha256:cf3f0c361a4270185baa89ec7ab92ecaa355fe783791457077473f974f654df5", - "sha256:dd46bde7309088481b1cf9c58e3f0e204b9ff9e3244f441accd220dd3365ce7c", - "sha256:dd5476b6c3fe410ee95926873f377b856dbc4e81a9c605a0dc05aaccc6a7c6c6", - "sha256:e69140bc2d29a8556f55445c15f5794490852af3de0f609a24003ef174528b79", - "sha256:f908a77cbeef9bbd646bd4b81214cbef9ac3dda4181d5092a4aa9797d1bc7774" + "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863", + "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2", + "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1", + "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad", + "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db", + "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76", + "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51", + "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32", + "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08", + "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b", + "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4", + "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921", + "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288", + "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee", + "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0", + "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24", + "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99", + "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151", + "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd", + "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce", + "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57", + "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef", + "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54", + "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a", + "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238", + "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23", + "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772", + "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce", + "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad" ], "index": "pypi", - "version": "==2.0.2" + "markers": "python_version >= '3.9'", + "version": "==2.2.2" }, "python-dateutil": { "hashes": [ - "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", - "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", - "version": "==2.8.2" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.9.0.post0" }, "python-levenshtein": { "hashes": [ @@ -197,14 +266,16 @@ }, "pytz": { "hashes": [ - "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588", - "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb" + "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", + "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319" ], - "version": "==2023.3" + "version": "==2024.1" }, "pyyaml": { "hashes": [ + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", @@ -212,7 +283,10 @@ "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", @@ -220,11 +294,15 @@ "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", + "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", @@ -237,7 +315,9 @@ "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", @@ -246,6 +326,7 @@ "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" ], "index": "pypi", + "markers": "python_version >= '3.6'", "version": "==6.0.1" }, "requests": { @@ -254,115 +335,140 @@ "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==2.31.0" }, + "scipy": { + "hashes": [ + "sha256:076c27284c768b84a45dcf2e914d4000aac537da74236a0d45d82c6fa4b7b3c0", + "sha256:07e179dc0205a50721022344fb85074f772eadbda1e1b3eecdc483f8033709b7", + "sha256:176c6f0d0470a32f1b2efaf40c3d37a24876cebf447498a4cefb947a79c21e9d", + "sha256:42470ea0195336df319741e230626b6225a740fd9dce9642ca13e98f667047c0", + "sha256:4c4161597c75043f7154238ef419c29a64ac4a7c889d588ea77690ac4d0d9b20", + "sha256:5b083c8940028bb7e0b4172acafda6df762da1927b9091f9611b0bcd8676f2bc", + "sha256:64b2ff514a98cf2bb734a9f90d32dc89dc6ad4a4a36a312cd0d6327170339eb0", + "sha256:65df4da3c12a2bb9ad52b86b4dcf46813e869afb006e58be0f516bc370165159", + "sha256:687af0a35462402dd851726295c1a5ae5f987bd6e9026f52e9505994e2f84ef6", + "sha256:6a9c9a9b226d9a21e0a208bdb024c3982932e43811b62d202aaf1bb59af264b1", + "sha256:6d056a8709ccda6cf36cdd2eac597d13bc03dba38360f418560a93050c76a16e", + "sha256:7d3da42fbbbb860211a811782504f38ae7aaec9de8764a9bef6b262de7a2b50f", + "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484", + "sha256:94c164a9e2498e68308e6e148646e486d979f7fcdb8b4cf34b5441894bdb9caf", + "sha256:9e3154691b9f7ed73778d746da2df67a19d046a6c8087c8b385bc4cdb2cfca74", + "sha256:9eee2989868e274aae26125345584254d97c56194c072ed96cb433f32f692ed8", + "sha256:a01cc03bcdc777c9da3cfdcc74b5a75caffb48a6c39c8450a9a05f82c4250a14", + "sha256:a7d46c3e0aea5c064e734c3eac5cf9eb1f8c4ceee756262f2c7327c4c2691c86", + "sha256:ad36af9626d27a4326c8e884917b7ec321d8a1841cd6dacc67d2a9e90c2f0359", + "sha256:b5923f48cb840380f9854339176ef21763118a7300a88203ccd0bdd26e58527b", + "sha256:bbc0471b5f22c11c389075d091d3885693fd3f5e9a54ce051b46308bc787e5d4", + "sha256:bff2438ea1330e06e53c424893ec0072640dac00f29c6a43a575cbae4c99b2b9", + "sha256:c40003d880f39c11c1edbae8144e3813904b10514cd3d3d00c277ae996488cdb", + "sha256:d91db2c41dd6c20646af280355d41dfa1ec7eead235642178bd57635a3f82209", + "sha256:f0a50da861a7ec4573b7c716b2ebdcdf142b66b756a0d392c236ae568b3a93fb" + ], + "markers": "python_version >= '3.10'", + "version": "==1.14.0" + }, "setuptools": { "hashes": [ - "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f", - "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235" + "sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1", + "sha256:8d243eff56d095e5817f796ede6ae32941278f542e0f941867cc05ae52b162ec" ], - "markers": "python_version >= '3.7'", - "version": "==68.0.0" + "markers": "python_version >= '3.8'", + "version": "==72.1.0" }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "tzdata": { "hashes": [ - "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a", - "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda" + "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd", + "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252" ], "markers": "python_version >= '2'", - "version": "==2023.3" + "version": "==2024.1" }, "urllib3": { "hashes": [ - "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11", - "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4" + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" ], - "markers": "python_version >= '3.7'", - "version": "==2.0.4" + "markers": "python_version >= '3.8'", + "version": "==2.2.2" } }, "develop": { "coverage": { "hashes": [ - "sha256:06a9a2be0b5b576c3f18f1a241f0473575c4a26021b52b2a85263a00f034d51f", - "sha256:06fb182e69f33f6cd1d39a6c597294cff3143554b64b9825d1dc69d18cc2fff2", - "sha256:0a5f9e1dbd7fbe30196578ca36f3fba75376fb99888c395c5880b355e2875f8a", - "sha256:0e1f928eaf5469c11e886fe0885ad2bf1ec606434e79842a879277895a50942a", - "sha256:171717c7cb6b453aebac9a2ef603699da237f341b38eebfee9be75d27dc38e01", - "sha256:1e9d683426464e4a252bf70c3498756055016f99ddaec3774bf368e76bbe02b6", - "sha256:201e7389591af40950a6480bd9edfa8ed04346ff80002cec1a66cac4549c1ad7", - "sha256:245167dd26180ab4c91d5e1496a30be4cd721a5cf2abf52974f965f10f11419f", - "sha256:2aee274c46590717f38ae5e4650988d1af340fe06167546cc32fe2f58ed05b02", - "sha256:2e07b54284e381531c87f785f613b833569c14ecacdcb85d56b25c4622c16c3c", - "sha256:31563e97dae5598556600466ad9beea39fb04e0229e61c12eaa206e0aa202063", - "sha256:33d6d3ea29d5b3a1a632b3c4e4f4ecae24ef170b0b9ee493883f2df10039959a", - "sha256:3d376df58cc111dc8e21e3b6e24606b5bb5dee6024f46a5abca99124b2229ef5", - "sha256:419bfd2caae268623dd469eff96d510a920c90928b60f2073d79f8fe2bbc5959", - "sha256:48c19d2159d433ccc99e729ceae7d5293fbffa0bdb94952d3579983d1c8c9d97", - "sha256:49969a9f7ffa086d973d91cec8d2e31080436ef0fb4a359cae927e742abfaaa6", - "sha256:52edc1a60c0d34afa421c9c37078817b2e67a392cab17d97283b64c5833f427f", - "sha256:537891ae8ce59ef63d0123f7ac9e2ae0fc8b72c7ccbe5296fec45fd68967b6c9", - "sha256:54b896376ab563bd38453cecb813c295cf347cf5906e8b41d340b0321a5433e5", - "sha256:58c2ccc2f00ecb51253cbe5d8d7122a34590fac9646a960d1430d5b15321d95f", - "sha256:5b7540161790b2f28143191f5f8ec02fb132660ff175b7747b95dcb77ac26562", - "sha256:5baa06420f837184130752b7c5ea0808762083bf3487b5038d68b012e5937dbe", - "sha256:5e330fc79bd7207e46c7d7fd2bb4af2963f5f635703925543a70b99574b0fea9", - "sha256:61b9a528fb348373c433e8966535074b802c7a5d7f23c4f421e6c6e2f1697a6f", - "sha256:63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb", - "sha256:6d040ef7c9859bb11dfeb056ff5b3872436e3b5e401817d87a31e1750b9ae2fb", - "sha256:6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1", - "sha256:7ee7d9d4822c8acc74a5e26c50604dff824710bc8de424904c0982e25c39c6cb", - "sha256:81c13a1fc7468c40f13420732805a4c38a105d89848b7c10af65a90beff25250", - "sha256:8d13c64ee2d33eccf7437961b6ea7ad8673e2be040b4f7fd4fd4d4d28d9ccb1e", - "sha256:8de8bb0e5ad103888d65abef8bca41ab93721647590a3f740100cd65c3b00511", - "sha256:8fa03bce9bfbeeef9f3b160a8bed39a221d82308b4152b27d82d8daa7041fee5", - "sha256:924d94291ca674905fe9481f12294eb11f2d3d3fd1adb20314ba89e94f44ed59", - "sha256:975d70ab7e3c80a3fe86001d8751f6778905ec723f5b110aed1e450da9d4b7f2", - "sha256:976b9c42fb2a43ebf304fa7d4a310e5f16cc99992f33eced91ef6f908bd8f33d", - "sha256:9e31cb64d7de6b6f09702bb27c02d1904b3aebfca610c12772452c4e6c21a0d3", - "sha256:a342242fe22407f3c17f4b499276a02b01e80f861f1682ad1d95b04018e0c0d4", - "sha256:a3d33a6b3eae87ceaefa91ffdc130b5e8536182cd6dfdbfc1aa56b46ff8c86de", - "sha256:a895fcc7b15c3fc72beb43cdcbdf0ddb7d2ebc959edac9cef390b0d14f39f8a9", - "sha256:afb17f84d56068a7c29f5fa37bfd38d5aba69e3304af08ee94da8ed5b0865833", - "sha256:b1c546aca0ca4d028901d825015dc8e4d56aac4b541877690eb76490f1dc8ed0", - "sha256:b29019c76039dc3c0fd815c41392a044ce555d9bcdd38b0fb60fb4cd8e475ba9", - "sha256:b46517c02ccd08092f4fa99f24c3b83d8f92f739b4657b0f146246a0ca6a831d", - "sha256:b7aa5f8a41217360e600da646004f878250a0d6738bcdc11a0a39928d7dc2050", - "sha256:b7b4c971f05e6ae490fef852c218b0e79d4e52f79ef0c8475566584a8fb3e01d", - "sha256:ba90a9563ba44a72fda2e85302c3abc71c5589cea608ca16c22b9804262aaeb6", - "sha256:cb017fd1b2603ef59e374ba2063f593abe0fc45f2ad9abdde5b4d83bd922a353", - "sha256:d22656368f0e6189e24722214ed8d66b8022db19d182927b9a248a2a8a2f67eb", - "sha256:d2c2db7fd82e9b72937969bceac4d6ca89660db0a0967614ce2481e81a0b771e", - "sha256:d39b5b4f2a66ccae8b7263ac3c8170994b65266797fb96cbbfd3fb5b23921db8", - "sha256:d62a5c7dad11015c66fbb9d881bc4caa5b12f16292f857842d9d1871595f4495", - "sha256:e7d9405291c6928619403db1d10bd07888888ec1abcbd9748fdaa971d7d661b2", - "sha256:e84606b74eb7de6ff581a7915e2dab7a28a0517fbe1c9239eb227e1354064dcd", - "sha256:eb393e5ebc85245347950143969b241d08b52b88a3dc39479822e073a1a8eb27", - "sha256:ebba1cd308ef115925421d3e6a586e655ca5a77b5bf41e02eb0e4562a111f2d1", - "sha256:ee57190f24fba796e36bb6d3aa8a8783c643d8fa9760c89f7a98ab5455fbf818", - "sha256:f2f67fe12b22cd130d34d0ef79206061bfb5eda52feb6ce0dba0644e20a03cf4", - "sha256:f6951407391b639504e3b3be51b7ba5f3528adbf1a8ac3302b687ecababf929e", - "sha256:f75f7168ab25dd93110c8a8117a22450c19976afbc44234cbf71481094c1b850", - "sha256:fdec9e8cbf13a5bf63290fc6013d216a4c7232efb51548594ca3631a7f13c3a3" + "sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382", + "sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1", + "sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac", + "sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee", + "sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166", + "sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57", + "sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c", + "sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b", + "sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51", + "sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da", + "sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450", + "sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2", + "sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd", + "sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d", + "sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d", + "sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6", + "sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca", + "sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169", + "sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1", + "sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713", + "sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b", + "sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6", + "sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c", + "sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605", + "sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463", + "sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b", + "sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6", + "sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5", + "sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63", + "sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c", + "sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783", + "sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44", + "sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca", + "sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8", + "sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d", + "sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390", + "sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933", + "sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67", + "sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b", + "sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03", + "sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b", + "sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791", + "sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb", + "sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807", + "sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6", + "sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2", + "sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428", + "sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd", + "sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c", + "sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94", + "sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8", + "sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b" ], "index": "pypi", - "version": "==7.2.7" + "markers": "python_version >= '3.8'", + "version": "==7.6.0" }, "exceptiongroup": { "hashes": [ - "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5", - "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f" + "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", + "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc" ], "markers": "python_version < '3.11'", - "version": "==1.1.2" + "version": "==1.2.2" }, "iniconfig": { "hashes": [ @@ -374,27 +480,28 @@ }, "packaging": { "hashes": [ - "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61", - "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f" + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" ], - "markers": "python_version >= '3.7'", - "version": "==23.1" + "markers": "python_version >= '3.8'", + "version": "==24.1" }, "pluggy": { "hashes": [ - "sha256:c2fd55a7d7a3863cba1a013e4e2414658b1d07b6bc57b3919e0c63c9abb99849", - "sha256:d12f0c4b579b15f5e054301bb226ee85eeeba08ffec228092f8defbaa3a4c4b3" + "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", + "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669" ], - "markers": "python_version >= '3.7'", - "version": "==1.2.0" + "markers": "python_version >= '3.8'", + "version": "==1.5.0" }, "pytest": { "hashes": [ - "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32", - "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a" + "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", + "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce" ], "index": "pypi", - "version": "==7.4.0" + "markers": "python_version >= '3.8'", + "version": "==8.3.2" }, "tomli": { "hashes": [ diff --git a/README.md b/README.md index fa27f8d..718ae86 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ 1. minimap2 (https://github.com/lh3/minimap2) (must be available via commandline) 2. blast tools (ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/) 3. R and RSCRIPT (https://www.r-project.org/) +4. mafft (https://mafft.cbrc.jp/alignment/software/) ### Singularity builds * Build all singularity images inside of the `simages` folder diff --git a/Singularity b/Singularity index d938b8e..4a91529 100644 --- a/Singularity +++ b/Singularity @@ -3,7 +3,7 @@ Bootstrap: docker From: ubuntu:22.04 %help - Search proviral consensus sequences for primers, then use HIVSeqinR to + Search proviral consensus sequences for primers, then use HIVIntact to decide if the genomes are complete. This Singularity container can be run on Kive: http://cfe-lab.github.io/Kive @@ -13,7 +13,7 @@ From: ubuntu:22.04 MAINTAINER BC CfE in HIV/AIDS https://github.com/cfe-lab/ KIVE_INPUTS sample_info_csv contigs_csv conseqs_csv cascade_csv KIVE_OUTPUTS outcome_summary_csv conseqs_primers_csv contigs_primers_csv \ - table_precursor_csv proviral_landscape_csv hivseqinr_results_tar + table_precursor_csv proviral_landscape_csv detailed_results_tar KIVE_THREADS 1 KIVE_MEMORY 6000 @@ -28,6 +28,9 @@ From: ubuntu:22.04 fontconfig libbz2-dev liblzma-dev libssl-dev \ libffi-dev libsqlite3-dev + echo ===== Installing MAFFT ===== >/dev/null + apt-get install -y mafft + echo ===== Installing Python ===== >/dev/null apt-get install -y python3 python3-pip @@ -41,12 +44,6 @@ From: ubuntu:22.04 echo ===== Installing minimap2 ===== >/dev/null apt-get install -y minimap2 - echo ===== Installing hivseqinr ===== >/dev/null - apt-get install -y libz-dev libcurl4-openssl-dev libxml2-dev - DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y r-base - Rscript /opt/primer_finder/gene_splicer/configure_r.sh - python3 -m gene_splicer.hivseqinr /opt/hivseqinr - # Clean up apt-get remove -y wget git build-essential @@ -54,4 +51,4 @@ From: ubuntu:22.04 export LANG=en_US.UTF-8 %runscript - gene_splicer_sample --hivseqinr /opt/hivseqinr "$@" + gene_splicer_sample --hivintact "$@" diff --git a/gene_splicer/landscapes.py b/gene_splicer/landscapes.py new file mode 100644 index 0000000..9da65e7 --- /dev/null +++ b/gene_splicer/landscapes.py @@ -0,0 +1,222 @@ +import csv +import logging +import os +import re +import typing +from typing import TextIO, Mapping, Dict, Set, List, Iterable, Tuple +import argparse +import sys + +import json +import shutil +import subprocess as sp +import glob +from pathlib import Path +from csv import DictWriter, DictReader +from itertools import groupby +from operator import itemgetter + +from gene_splicer.utils import ( + iterate_hivintact_verdicts_1, + iterate_hivseqinr_verdicts_1, + LEFT_PRIMER_END, RIGHT_PRIMER_START, +) + + +logger = logging.getLogger(__name__) + + +def generate_proviral_landscape_csv_1_cont(blastn_reader: csv.DictReader, + landscape_writer: csv.DictWriter, + verdicts: Mapping[str, str], + ) -> None: + + for row in blastn_reader: + if row['qseqid'] in ['8E5LAV', 'HXB2']: + # skip the positive control rows + continue + + ref_start = int(row['sstart']) + ref_end = int(row['send']) + if ref_end <= LEFT_PRIMER_END or ref_start >= RIGHT_PRIMER_START: + # skip unspecific matches of LTR at start and end + continue + + qseqid = row['qseqid'] + try: + [run_name, sample_name, _, _] = qseqid.split('::') + except ValueError: + [run_name, sample_name] = [None, qseqid] + + is_inverted = '' + if ref_end < ref_start: + # automatically recognize inverted regions + new_end = ref_start + ref_start = ref_end + ref_end = new_end + is_inverted = 'yes' + + verdict = verdicts[qseqid] + is_defective = verdict != 'Intact' + landscape_entry = {'ref_start': ref_start, + 'ref_end': ref_end, + 'samp_name': sample_name, + 'run_name': run_name, + 'is_inverted': is_inverted, + 'is_defective': is_defective, + 'defect': verdict, + } + + landscape_writer.writerow(landscape_entry) + + +def get_hivintact_verdicts_1_map(details_dir: Path) -> Mapping[str, str]: + ret: Dict[str, str] = {} + + for [qseqid, verdict] in iterate_hivintact_verdicts_1(details_dir): + ret[qseqid] = verdict + + return ret + + +def get_hivseqinr_verdicts_1_map(details_dir: Path) -> Mapping[str, str]: + ret: Dict[str, str] = {} + + for [qseqid, verdict] in iterate_hivseqinr_verdicts_1(details_dir): + ret[qseqid] = verdict + + return ret + + +def generate_proviral_landscape_csv_1(landscape_writer: csv.DictWriter, + details_dir: Path, + ) -> None: + is_hivintact = (details_dir / "holistic.csv").exists() + if is_hivintact: + verdicts = get_hivintact_verdicts_1_map(details_dir) + blastn_path = details_dir / "blast.csv" + else: + verdicts = get_hivseqinr_verdicts_1_map(details_dir) + blastn_path = details_dir / "Results_Intermediate" / "Output_Blastn_HXB2MEGA28_tabdelim.txt" + + with blastn_path.open() as blastn_file: + if is_hivintact: + blastn_reader = DictReader(blastn_file) + else: + blastn_columns = ['qseqid', + 'qlen', + 'sseqid', + 'sgi', + 'slen', + 'qstart', + 'qend', + 'sstart', + 'send', + 'evalue', + 'bitscore', + 'length', + 'pident', + 'nident', + 'btop', + 'stitle', + 'sstrand'] + blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns, delimiter='\t') + + return generate_proviral_landscape_csv_1_cont( + blastn_reader, + landscape_writer, + verdicts, + ) + + +def generate_proviral_landscape_csv(outpath: Path, is_hivintact: bool): + proviral_landscape_csv = os.path.join(outpath, 'proviral_landscape.csv') + + if is_hivintact: + subpath = 'hivintact*' + else: + subpath = 'hivseqinr*' + + landscape_columns = ['samp_name', 'run_name', 'ref_start', 'ref_end', 'defect', 'is_inverted', 'is_defective'] + with open(proviral_landscape_csv, 'w') as landscape_file: + landscape_writer = csv.DictWriter(landscape_file, fieldnames=landscape_columns) + landscape_writer.writeheader() + + for details_dir in outpath.glob(subpath): + generate_proviral_landscape_csv_1(landscape_writer, details_dir) + + +class UserError(RuntimeError): + def __init__(self, fmt: str, *fmt_args: object): + self.fmt = fmt + self.fmt_args = fmt_args + self.code = 1 + + +def dir_path(string: str) -> Path: + if os.path.exists(string) and os.path.isdir(string): + return Path(string) + else: + raise UserError("Path %r does not exist or is not a directory.", string) + + +def cli_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Generate Proviral Landscape CSV.") + + parser.add_argument("--details_dir", type=dir_path, required=True, + help="Directory containing details files for verdicts.") + + parser.add_argument("--output", type=argparse.FileType("wt"), required=True, + help="Output CSV file for proviral landscape.") + + verbosity_group = parser.add_mutually_exclusive_group() + verbosity_group.add_argument('--verbose', action='store_true', + help='Increase output verbosity.') + verbosity_group.add_argument('--no-verbose', action='store_true', + help='Normal output verbosity.', default=True) + verbosity_group.add_argument('--debug', action='store_true', + help='Maximum output verbosity.') + verbosity_group.add_argument('--quiet', action='store_true', + help='Minimize output verbosity.') + + return parser + + +def main(argv: list) -> int: + parser = cli_parser() + args = parser.parse_args(argv) + if args.quiet: + logger.setLevel(logging.ERROR) + elif args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.WARN) + + logger.debug("Start.") + + fieldnames = ['ref_start', 'ref_end', 'samp_name', 'run_name', 'is_inverted', 'is_defective', 'defect'] + + landscape_writer = csv.DictWriter(args.output, fieldnames=fieldnames) + landscape_writer.writeheader() + generate_proviral_landscape_csv_1(landscape_writer, args.details_dir) + + logger.debug("Done.") + return 0 + + +if __name__ == '__main__': + try: + rc = main(sys.argv[1:]) + except BrokenPipeError: + logger.debug("Broken pipe.") + rc = 0 + except KeyboardInterrupt: + logger.debug("Interrupted.") + rc = 1 + except UserError as e: + logger.fatal(e.fmt, *e.fmt_args) + rc = e.code + + sys.exit(rc) diff --git a/gene_splicer/primer_finder.py b/gene_splicer/primer_finder.py index dbb741e..ad3232a 100644 --- a/gene_splicer/primer_finder.py +++ b/gene_splicer/primer_finder.py @@ -1,10 +1,12 @@ import re +import subprocess from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType from csv import DictReader, DictWriter from itertools import groupby from operator import itemgetter import os from tarfile import TarFile +import cfeintact import pandas as pd from pathlib import Path @@ -77,6 +79,9 @@ def parse_args(): help="Path to HIVSeqinR source code, or download " "destination. HIVSeqinR will be skipped if this " "isn't given.") + parser.add_argument('--hivintact', + action='store_true', + help="Launch the HIVIntact analysis.") parser.add_argument( '--nodups', action='store_false', @@ -386,8 +391,8 @@ def add_primers(row): def remove_primers(row): # Strip the primers out, convert index values from floats. newseq = row.sequence[ - int(row.fwd_sample_primer_size + row.fwd_sample_primer_start) - :-int(row.rev_sample_primer_size + row.rev_sample_primer_start)] + int(row.fwd_sample_primer_size + row.fwd_sample_primer_start): + -int(row.rev_sample_primer_size + row.rev_sample_primer_start)] row.sequence = newseq return row @@ -422,6 +427,13 @@ def archive_hivseqinr_results(working_path: Path, archive.add(result_path, result_path.name) +def archive_hivintact_results(working_path: Path, + hivintact_results_tar: typing.IO): + archive = TarFile(fileobj=hivintact_results_tar, mode='w') + for result_path in working_path.iterdir(): + archive.add(result_path, result_path.name) + + def run(contigs_csv, conseqs_csv, cascade_csv, @@ -433,7 +445,9 @@ def run(contigs_csv, sample_size=50, force_all_proviral=False, default_sample_name: str = None, - hivseqinr_results_tar: typing.IO = None): + hivseqinr_results_tar: typing.IO = None, + run_hivintact: bool = False, + hivintact_results_tar: typing.IO = None): all_samples = utils.get_samples_from_cascade(cascade_csv, default_sample_name) @@ -517,6 +531,36 @@ def run(contigs_csv, if hivseqinr_results_tar is not None: archive_hivseqinr_results(working_path, hivseqinr_results_tar) + if run_hivintact: + working_path = outpath / f'hivintact_{i}' + log_file_path = working_path / 'hiv-intact.log' + os.makedirs(working_path, exist_ok=True) + + logger = cfeintact.logger + file_handler = logging.FileHandler(log_file_path) + logger.addHandler(file_handler) + + cfeintact.check( + working_dir=working_path, + input_file=str(no_primers_fasta), + subtype="B", + check_packaging_signal=True, + check_rre=True, + check_major_splice_donor_site=True, + check_hypermut=True, + check_long_deletion=True, + check_nonhiv=True, + check_scramble=True, + check_internal_inversion=True, + check_unknown_nucleotides=True, + check_small_orfs=True, + check_distance=False, + output_csv=True, + ) + + if hivintact_results_tar is not None: + archive_hivintact_results(working_path, + hivintact_results_tar) files.append(no_primers_fasta) return files @@ -531,7 +575,8 @@ def main(): hivseqinr=args.hivseqinr, nodups=args.nodups, split=args.split, - sample_size=args.sample_size) + sample_size=args.sample_size, + run_hivintact=args.hivintact) return {'fasta_files': fasta_files, 'args': args} diff --git a/gene_splicer/primer_finder_class.py b/gene_splicer/primer_finder_class.py index d5f8cd5..a4f5cf9 100644 --- a/gene_splicer/primer_finder_class.py +++ b/gene_splicer/primer_finder_class.py @@ -144,7 +144,7 @@ def get_slices(self): hxb2_slice = utils.hxb2[self.hxb2_start - self.validation_size:self.hxb2_end] if len(sample_slice) == 0: - logger.debug(\ + logger.debug( 'Sample slice size is 0! \n' f'start: {self.start} \n' f'end: {self.end} \n' diff --git a/gene_splicer/sample.py b/gene_splicer/sample.py index 5e49fb9..d00e7fa 100644 --- a/gene_splicer/sample.py +++ b/gene_splicer/sample.py @@ -7,6 +7,7 @@ import gene_splicer.gene_splicer as gene_splicer import gene_splicer.primer_finder as primer_finder import gene_splicer.utils as utils +import gene_splicer.landscapes as landscapes def parse_args(): @@ -43,8 +44,9 @@ def parse_args(): parser.add_argument('proviral_landscape_csv', help='Data for proviral landscape plot', type=FileType('w')) - parser.add_argument('hivseqinr_results_tar', - help="Archive file with HIVSeqinR's final results folder.", + parser.add_argument('detailed_results_tar', + help="Archive file with HIVSeqinR's final results " + "folder, or HIVIntact's results.", type=FileType('wb')) parser.add_argument( '-p', @@ -57,6 +59,9 @@ def parse_args(): help="Path to HIVSeqinR source code, or download " "destination. HIVSeqinR will be skipped if this " "isn't given.") + parser.add_argument('--hivintact', + action='store_true', + help="Launch the HIVIntact analysis.") parser.add_argument( '--nodups', action='store_false', @@ -69,7 +74,7 @@ def parse_args(): help='To avoid memory issues in hivseqinr, split the resulting ' 'qc-passed sequences into this number of fastas, each will be ' 'processed sequentially and then all will be merged into the ' - 'final result') + 'final result. Obsolete for HIVIntact.') return parser.parse_args() @@ -88,10 +93,16 @@ def main(): info_reader = DictReader(args.sample_info_csv) sample_info: dict = next(info_reader) run_name = sample_info.get('run_name', 'kive_run') + if args.hivintact: + hivseqinr_results_tar = None + hivintact_results_tar = args.detailed_results_tar + else: + hivseqinr_results_tar = args.detailed_results_tar + hivintact_results_tar = None fasta_files = primer_finder.run(contigs_csv=args.contigs_csv, conseqs_csv=args.conseqs_csv, cascade_csv=args.cascade_csv, - hivseqinr_results_tar=args.hivseqinr_results_tar, + hivseqinr_results_tar=hivseqinr_results_tar, name=run_name, outpath=outpath, hivseqinr=args.hivseqinr, @@ -99,11 +110,13 @@ def main(): split=args.split, sample_size=args.sample_size, force_all_proviral=True, - default_sample_name=sample_info['sample']) + default_sample_name=sample_info['sample'], + run_hivintact=args.hivintact, + hivintact_results_tar=hivintact_results_tar) for file in fasta_files: gene_splicer.run(file, outdir=outpath) utils.generate_table_precursor(name=run_name, outpath=outpath) - utils.generate_proviral_landscape_csv(outpath) + landscapes.generate_proviral_landscape_csv(outpath, is_hivintact=args.hivintact) copy_output(outpath / 'outcome_summary.csv', args.outcome_summary_csv) copy_output(outpath / (run_name + '_conseqs_primer_analysis.csv'), args.conseqs_primers_csv) diff --git a/gene_splicer/study_summary.py b/gene_splicer/study_summary.py index 74a5652..45efe1f 100644 --- a/gene_splicer/study_summary.py +++ b/gene_splicer/study_summary.py @@ -43,6 +43,10 @@ def parse_args(): 'the runs in samples_csv. Any samples not found ' 'in samples.csv will guess the participant id ' 'from the first part of the sample name.') + parser.add_argument('--hivintact', + action='store_true', + help="Launch the HIVIntact analysis instead of " + "HIVSeqinR.") return parser.parse_args() @@ -197,7 +201,7 @@ def write_warnings(self, report_file: typing.TextIO, limit: int = None): file=report_file) -def run_gene_splicer(run_path: Path, outcome_folder: Path): +def run_gene_splicer(run_path: Path, outcome_folder: Path, run_hivintact: bool): version_results_path = run_path / 'Results' / 'version_7.14' assert version_results_path.exists(), version_results_path denovo_path = version_results_path / 'denovo' @@ -214,11 +218,14 @@ def run_gene_splicer(run_path: Path, outcome_folder: Path): pipeline_args = [python_path, '-m', 'gene_splicer.pipeline', '--outpath', str(outcome_folder), - '--hivseqinr', str(hivseqinr_path), contigs_path, conseq_path, cascade_path, short_run_name] + if run_hivintact: + pipeline_args.append('--hivintact') + else: + pipeline_args.append(f'--hivseqinr={hivseqinr_path}') try: with log_path.open('w') as log_file: run(pipeline_args, @@ -259,7 +266,7 @@ def main(): print('Missing denovo results:', run_path) continue else: - run_gene_splicer(run_path, outcome_path.parent) + run_gene_splicer(run_path, outcome_path.parent, args.hivintact) assert outcome_path.exists(), outcome_path print('.', end='', flush=True) dots_printed = True diff --git a/gene_splicer/utils.py b/gene_splicer/utils.py index b01b5bd..91cb81a 100644 --- a/gene_splicer/utils.py +++ b/gene_splicer/utils.py @@ -3,14 +3,18 @@ import os import re import typing +from typing import TextIO, Mapping, Dict, Set, List, Iterable, Tuple import yaml +import json import shutil import subprocess as sp import pandas as pd import glob from pathlib import Path from csv import DictWriter, DictReader +from itertools import groupby +from operator import itemgetter logger = logging.getLogger(__name__) @@ -97,9 +101,9 @@ def csv_to_bed(csvfile, target_name='HXB2', offset_start=0, offset_stop=0): }) -def split_cigar(row): +def split_cigar(string): pattern = re.compile(r'(\d+)([A-Z])') - cigar = re.findall(pattern, row[5]) + cigar = re.findall(pattern, string) return cigar @@ -210,11 +214,12 @@ def modify_annot(annot): def splice_genes(query, target, samfile, annotation): results = {} - for i, row in samfile.iterrows(): + for i, row in enumerate(samfile): # Subtract 1 to convert target position to zero-base target_pos = int(row[3]) - 1 query_pos = None - for size, op in row['cigar']: + cigar = row[5] + for size, op in split_cigar(cigar): size = int(size) # logger.debug(f'size: {size}, op: {op}') # logger.debug(f'target_pos: {target_pos}, query_pos: {query_pos}') @@ -273,11 +278,12 @@ def coords_to_genes(coords, query): def splice_aligned_genes(query, target, samfile, annotation): results = {} sequences = {} - for i, row in samfile.iterrows(): + for i, row in enumerate(samfile): # Subtract 1 to convert target position to zero-base target_pos = int(row[3]) - 1 query_pos = None - for size, op in row['cigar']: + cigar = row[5] + for size, op in split_cigar(cigar): # print(f'size: {size}, op: {op}') # print(f'target_pos: {target_pos}, query_pos: {query_pos}') size = int(size) @@ -330,9 +336,17 @@ def splice_aligned_genes(query, target, samfile, annotation): return results, sequences -def load_samfile(samfile_path): - result = pd.read_table(samfile_path, skiprows=2, header=None) - result['cigar'] = result.apply(split_cigar, axis=1) +def load_samfile(samfile_path: Path) -> List[List[str]]: + with open(samfile_path, 'r') as file: + reader = csv.reader(file, delimiter='\t') + + result = [] + for row in reader: + # Skip header lines (lines starting with '@') + if row[0].startswith('@'): + continue + result.append(row) + return result @@ -390,6 +404,92 @@ def align(target_seq, else: return alignment_path +HIVINTACT_ERRORS_TABLE = [ + 'UnknownNucleotide', + 'NonHIV', + 'LongDeletion', + 'InternalInversion', + 'Scramble', + 'APOBECHypermutation', + 'MajorSpliceDonorSiteMutated', + 'PackagingSignalDeletion', + 'PackagingSignalNotComplete', + 'RevResponseElementDeletion', + 'MisplacedORF', + 'WrongORFNumber', + 'Deletion', + 'Insertion', + 'InternalStop', + 'Frameshift', + 'MutatedStopCodon', + 'MutatedStartCodon', + 'SequenceDivergence', + ] + +def iterate_hivintact_verdicts_1(directory: Path, intact: Set[str] = set()) -> Iterable[Tuple[str, str]]: + intact = set() + + def get_verdict(SEQID: str, all_defects) -> Tuple[str, str]: + if all_defects: + ordered = sorted(all_defects, key=HIVINTACT_ERRORS_TABLE.index) + verdict = ordered[0] + else: + verdict = "Intact" + + return (SEQID, verdict) + + with open(os.path.join(directory, 'holistic.csv'), 'r') as f: + reader = csv.DictReader(f) + for row in reader: + if row["intact"] == "True": + intact.add(row["qseqid"]) + SEQID = row["qseqid"] + yield get_verdict(SEQID, all_defects=[]) + + with open(os.path.join(directory, 'defects.csv'), 'r') as f: + reader = csv.DictReader(f) + grouped = groupby(reader, key=itemgetter('qseqid')) + for sequence_name, defects in grouped: + if sequence_name not in intact: + all_defects = [defect['code'] for defect in defects] + yield get_verdict(sequence_name, all_defects) + + +def iterate_hivintact_verdicts(outpath: Path) -> Iterable[Tuple[str, str]]: + intact: Set[str] = set() + + for directory in outpath.glob('hivintact*'): + yield from iterate_hivintact_verdicts_1(directory, intact) + + +def get_hivintact_verdicts(name, outpath): + column_names = ['SEQID', 'MyVerdict'] + data = iterate_hivintact_verdicts(outpath) + return pd.DataFrame(data, columns=column_names) + + +def iterate_hivseqinr_verdicts_1(directory: Path) -> Iterable[Tuple[str, str]]: + path = directory / 'Output_MyBigSummary_DF_FINAL.csv' + if not path.is_file(): + return + + with path.open() as fd: + reader = csv.DictReader(fd) + for row in reader: + yield (row["SEQID"], row["MyVerdict"]) + + +def iterate_hivseqinr_verdicts(outpath: Path) -> Iterable[Tuple[str, str]]: + seqinr_paths = outpath.glob('hivseqinr*/Results_Final/Output_MyBigSummary_DF_FINAL.csv') + for path in seqinr_paths: + yield from iterate_hivseqinr_verdicts_1(path) + + +def get_hivseqinr_verdicts(name, outpath): + column_names = ['SEQID', 'MyVerdict'] + data = iterate_hivseqinr_verdicts(outpath) + return pd.DataFrame(data, columns=column_names) + def generate_table_precursor(name, outpath, add_columns=None): # Output csv @@ -398,24 +498,21 @@ def generate_table_precursor(name, outpath, add_columns=None): # Load filtered sequences filtered_path = outpath / (name + '_filtered.csv') filtered = pd.read_csv(filtered_path) - # Load hivseqinr data - seqinr_paths = glob.glob( - str(outpath / 'hivseqinr*' / 'Results_Final' / - 'Output_MyBigSummary_DF_FINAL.csv')) - parts = [] - for path in seqinr_paths: - if not os.path.isfile(path): - continue - part = pd.read_csv(path) - parts.append(part) - # seqinr = pd.read_csv(seqinr_path) + # Load hivseqinr data or HIVIntact results + + if any(outpath.glob('hivintact*')): + results = get_hivintact_verdicts(name, outpath) + elif any(outpath.glob('hivseqinr*')): + results = get_hivseqinr_verdicts(name, outpath) + else: + raise RuntimeError("Neither HIVIntact nor HIVSeqinR directory exists.") + try: - seqinr = pd.concat(parts) # Assign new columns based on split - seqinr[['name', 'sample', 'reference', - 'seqtype']] = seqinr['SEQID'].str.split('::', expand=True) + results[['name', 'sample', 'reference', + 'seqtype']] = results['SEQID'].str.split('::', expand=True) # Merge - merged = seqinr.merge(filtered, on='sample') + merged = results.merge(filtered, on='sample') except ValueError: with precursor_path.open('w') as output_file: writer = DictWriter(output_file, @@ -448,7 +545,7 @@ def generate_table_precursor(name, outpath, add_columns=None): if add_columns: for key, val in add_columns.items(): merged[key] = val - if parts: + if not results.empty: merged[['sample', 'sequence', 'MyVerdict'] + genes_of_interest].to_csv( precursor_path, index=False) else: @@ -497,89 +594,18 @@ def generate_table_precursor_2(hivseqinr_resultsfile, filtered_file, return table_precursorfile -def generate_proviral_landscape_csv(outpath): - proviral_landscape_csv = os.path.join(outpath, 'proviral_landscape.csv') - landscape_rows = [] - - table_precursor_csv = os.path.join(outpath, 'table_precursor.csv') - blastn_csv = glob.glob( - os.path.join( - outpath, - 'hivseqinr*', - 'Results_Intermediate', - 'Output_Blastn_HXB2MEGA28_tabdelim.txt' - ) - )[0] - - blastn_columns = ['qseqid', - 'qlen', - 'sseqid', - 'sgi', - 'slen', - 'qstart', - 'qend', - 'sstart', - 'send', - 'evalue', - 'bitscore', - 'length', - 'pident', - 'nident', - 'btop', - 'stitle', - 'sstrand'] - with open(blastn_csv, 'r') as blastn_file: - blastn_reader = DictReader(blastn_file, fieldnames=blastn_columns, delimiter='\t') - for row in blastn_reader: - if row['qseqid'] in ['8E5LAV', 'HXB2']: - # skip the positive control rows - continue - ref_start = int(row['sstart']) - ref_end = int(row['send']) - if ref_end <= LEFT_PRIMER_END or ref_start >= RIGHT_PRIMER_START: - # skip unspecific matches of LTR at start and end - continue - [run_name, sample_name, _, _] = row['qseqid'].split('::') - is_inverted = '' - if ref_end < ref_start: - # automatically recognize inverted regions - new_end = ref_start - ref_start = ref_end - ref_end = new_end - is_inverted = 'yes' - landscape_entry = {'ref_start': ref_start, - 'ref_end': ref_end, - 'samp_name': sample_name, - 'run_name': run_name, - 'is_inverted': is_inverted, - 'is_defective': ''} - # is_defective is empty for now, will be filled manually - landscape_rows.append(landscape_entry) - - with open(table_precursor_csv, 'r') as tab_prec: - tab_prec_reader = DictReader(tab_prec) - for row in tab_prec_reader: - samp_name = row['sample'] - verdict = row['MyVerdict'] - for entry in landscape_rows: - if entry['samp_name'] == samp_name: - entry['defect'] = verdict - - landscape_columns = ['samp_name', 'run_name', 'ref_start', 'ref_end', 'defect', 'is_inverted', 'is_defective'] - with open(proviral_landscape_csv, 'w') as landscape_file: - landscape_writer = csv.DictWriter(landscape_file, fieldnames=landscape_columns) - landscape_writer.writeheader() - landscape_writer.writerows(landscape_rows) - - def get_softclipped_region(query, alignment, alignment_path): try: - size, op = alignment.iloc[0]['cigar'][0] + first_match = alignment[0] except IndexError: logger.warning('No alignment in %s!', alignment_path) return + + cigar = first_match[5] + size, op = split_cigar(cigar)[0] if op != 'S': return + size = int(size) return query[:size] diff --git a/setup.py b/setup.py index 069adcc..813dd68 100644 --- a/setup.py +++ b/setup.py @@ -8,8 +8,9 @@ 'gotoh @ git+https://github.com/cfe-lab/MiCall.git@v7.7.0#egg=gotoh&subdirectory=micall/alignment', 'numpy==1.25.1', 'python-Levenshtein==0.12.0', - 'pandas==2.0.2', + 'pandas==2.2.2', 'requests==2.31.0', + 'cfeintact @ git+https://github.com/cfe-lab/CFEIntact.git@v1.23.0', 'pyyaml' ], package_data={ diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index d378d2c..28d0adc 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -16,7 +16,8 @@ def test_get_softclip_start(): # Normally this alignment would be generated by a separate function aln_path = example / 'alignment.sam' aln = utils.load_samfile(aln_path) - size, op = aln.iloc[0]['cigar'][0] + cigar = aln[0][5] + size, op = utils.split_cigar(cigar)[0] size = int(size) query_fasta = Fasta(example / 'query.fasta') query_sequence = None @@ -110,4 +111,4 @@ def test_getSamplesFromCascade(): samples = utils.get_samples_from_cascade(cascade) assert len(samples) == 10 for i in range(10): - assert samples[str(i)] == i \ No newline at end of file + assert samples[str(i)] == i