diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index cb678d5..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,1722 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "anstream" -version = "0.6.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" - -[[package]] -name = "anstyle-parse" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" -dependencies = [ - "anstyle", - "windows-sys", -] - -[[package]] -name = "anyhow" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" - -[[package]] -name = "approx" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" -dependencies = [ - "num-traits", -] - -[[package]] -name = "async-compression" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5" -dependencies = [ - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "autocfg" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" - -[[package]] -name = "backtrace" -version = "0.3.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "bio" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ffe04b598449a4486a45bc15884537a01cb2015d978ce66e51a47b2740ddbe" -dependencies = [ - "anyhow", - "approx", - "bio-types", - "bit-set", - "bv", - "bytecount", - "csv", - "custom_derive", - "editdistancek", - "enum-map", - "fxhash", - "itertools", - "itertools-num", - "lazy_static", - "multimap", - "ndarray", - "newtype_derive", - "num-integer", - "num-traits", - "ordered-float", - "petgraph", - "rand", - "regex", - "serde", - "serde_derive", - "statrs", - "strum", - "strum_macros", - "thiserror", - "triple_accel", - "vec_map", -] - -[[package]] -name = "bio-types" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc7edd677651969cc262a8dfb870f0c2266c3ceeaf863d742982e39699ff460" -dependencies = [ - "derive-new", - "lazy_static", - "regex", - "strum_macros", - "thiserror", -] - -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec 0.6.3", -] - -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - -[[package]] -name = "bit-vec" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22" - -[[package]] -name = "bitflags" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "bstr" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" -dependencies = [ - "memchr", - "serde", -] - -[[package]] -name = "bv" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8834bb1d8ee5dc048ee3124f2c7c1afcc6bc9aed03f11e9dfd8c69470a5db340" -dependencies = [ - "feature-probe", - "serde", -] - -[[package]] -name = "bytecount" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" - -[[package]] -name = "bytemuck" -version = "1.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "cc" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324c74f2155653c90b04f25b2a47a8a631360cb908f92a772695f430c7e31052" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "clap" -version = "4.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" -dependencies = [ - "clap_builder", - "clap_derive", -] - -[[package]] -name = "clap_builder" -version = "4.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_derive" -version = "4.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.71", -] - -[[package]] -name = "clap_lex" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" - -[[package]] -name = "colorchoice" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" - -[[package]] -name = "cpufeatures" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] - -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" - -[[package]] -name = "derive-new" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.71", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - -[[package]] -name = "editdistancek" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e02df23d5b1c6f9e69fa603b890378123b93073df998a21e6e33b9db0a32613" - -[[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" - -[[package]] -name = "enum-map" -version = "2.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" -dependencies = [ - "enum-map-derive", -] - -[[package]] -name = "enum-map-derive" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.71", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "fallible-iterator" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" - -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - -[[package]] -name = "feature-probe" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" - -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "flate2" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "futures" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - -[[package]] -name = "gen" -version = "0.1.0" -dependencies = [ - "bio", - "clap", - "include_dir", - "noodles", - "rusqlite", - "rusqlite_migration", - "sha2", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", -] - -[[package]] -name = "hashlink" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" -dependencies = [ - "hashbrown", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "include_dir" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" -dependencies = [ - "include_dir_macros", -] - -[[package]] -name = "include_dir_macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" -dependencies = [ - "proc-macro2", - "quote", -] - -[[package]] -name = "indexmap" -version = "2.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "is_terminal_polyfill" -version = "1.70.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools-num" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a872a22f9e6f7521ca557660adb96dd830e54f0f490fa115bb55dd69d38b27e7" -dependencies = [ - "num-traits", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "lazy_static" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" - -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.155" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" - -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - -[[package]] -name = "libsqlite3-sys" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "log" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "matrixmultiply" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "miniz_oxide" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" -dependencies = [ - "adler", -] - -[[package]] -name = "multimap" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" -dependencies = [ - "serde", -] - -[[package]] -name = "nalgebra" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" -dependencies = [ - "approx", - "matrixmultiply", - "nalgebra-macros", - "num-complex", - "num-rational", - "num-traits", - "rand", - "rand_distr", - "simba", - "typenum", -] - -[[package]] -name = "nalgebra-macros" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "ndarray" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] - -[[package]] -name = "newtype_derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" -dependencies = [ - "rustc_version", -] - -[[package]] -name = "noodles" -version = "0.78.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81e0702ff73390d2b8f97751bc34cfedcd3f5d39b4e32875e4c64a32d8cc670b" -dependencies = [ - "noodles-bam", - "noodles-bcf", - "noodles-bgzf", - "noodles-cram", - "noodles-csi", - "noodles-fasta", - "noodles-fastq", - "noodles-gff", - "noodles-sam", - "noodles-tabix", - "noodles-vcf", -] - -[[package]] -name = "noodles-bam" -version = "0.65.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "406d4768f21c73e3075c0c0d77a5b21bc8b8169c8f0963122607cc410427b727" -dependencies = [ - "bit-vec 0.7.0", - "bstr", - "byteorder", - "bytes", - "futures", - "indexmap", - "noodles-bgzf", - "noodles-core", - "noodles-csi", - "noodles-sam", - "tokio", -] - -[[package]] -name = "noodles-bcf" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f217d811dde790866d3c4902ef713f9dc5c4b96e51935571ddf68399f97ad6" -dependencies = [ - "byteorder", - "futures", - "indexmap", - "noodles-bgzf", - "noodles-core", - "noodles-csi", - "noodles-vcf", - "tokio", -] - -[[package]] -name = "noodles-bgzf" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2fba0f4a64cc897d9396d730a0c444d148daed7de31ad5904ecc673178fc9d" -dependencies = [ - "byteorder", - "bytes", - "crossbeam-channel", - "flate2", - "futures", - "pin-project-lite", - "tokio", - "tokio-util", -] - -[[package]] -name = "noodles-core" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5a8c6b020d1205abef2b0fab4463a6c5ecc3c8f4d561ca8b0d1a42323376200" -dependencies = [ - "bstr", -] - -[[package]] -name = "noodles-cram" -version = "0.66.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7f58d900eb9fa1f0ef1a4834a6f71bfa792b25b2fa3b02e582239089907909" -dependencies = [ - "async-compression", - "bitflags", - "bstr", - "byteorder", - "bytes", - "bzip2", - "flate2", - "futures", - "indexmap", - "md-5", - "noodles-bam", - "noodles-core", - "noodles-fasta", - "noodles-sam", - "pin-project-lite", - "tokio", - "xz2", -] - -[[package]] -name = "noodles-csi" -version = "0.37.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4bc8001c54f1d8e47e1ac6041a5f27edc99b68bacea3fade9c89059de285aea" -dependencies = [ - "bit-vec 0.7.0", - "byteorder", - "indexmap", - "noodles-bgzf", - "noodles-core", - "tokio", -] - -[[package]] -name = "noodles-fasta" -version = "0.41.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a1662ac3ace299515c982a322e378bbeb4c1bd90fb098d823ef0f3a6abcc00" -dependencies = [ - "bstr", - "bytes", - "memchr", - "noodles-bgzf", - "noodles-core", - "tokio", -] - -[[package]] -name = "noodles-fastq" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1edf1f924acddeee36304c444e242b9bda52ef9383dc2d7f008fca190753207" -dependencies = [ - "futures", - "memchr", - "tokio", -] - -[[package]] -name = "noodles-gff" -version = "0.35.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adef59012090b5694b58cad0e4426cd18af404803f942d02e664af607d89ee28" -dependencies = [ - "futures", - "indexmap", - "noodles-bgzf", - "noodles-core", - "noodles-csi", - "percent-encoding", - "tokio", -] - -[[package]] -name = "noodles-sam" -version = "0.62.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b80efc627501962e2ff15411d1c011fa9cf3db1b47ddd13dceb1d1134068d5b7" -dependencies = [ - "bitflags", - "bstr", - "futures", - "indexmap", - "lexical-core", - "memchr", - "noodles-bgzf", - "noodles-core", - "noodles-csi", - "tokio", -] - -[[package]] -name = "noodles-tabix" -version = "0.43.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "545e16e229b7f8734b0a2a36bd4c98a5b70128663b16b5201ddadc0d09c28d4a" -dependencies = [ - "bit-vec 0.7.0", - "byteorder", - "indexmap", - "noodles-bgzf", - "noodles-core", - "noodles-csi", - "tokio", -] - -[[package]] -name = "noodles-vcf" -version = "0.61.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6372f1df57c1826d083370b6eac586f331509feed15fd80dda306ef3e7ac68d" -dependencies = [ - "futures", - "indexmap", - "memchr", - "noodles-bgzf", - "noodles-core", - "noodles-csi", - "noodles-tabix", - "percent-encoding", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "object" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "ordered-float" -version = "4.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ff2cf528c6c03d9ed653d6c4ce1dc0582dc4af309790ad92f07c1cd551b0be" -dependencies = [ - "num-traits", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "regex" -version = "1.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" - -[[package]] -name = "rusqlite" -version = "0.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" -dependencies = [ - "bitflags", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "smallvec", -] - -[[package]] -name = "rusqlite_migration" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55709bc01054c69e2f1cefdc886642b5e6376a8db3c86f761be0c423eebf178b" -dependencies = [ - "include_dir", - "log", - "rusqlite", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - -[[package]] -name = "rustc_version" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" -dependencies = [ - "semver", -] - -[[package]] -name = "rustversion" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" - -[[package]] -name = "safe_arch" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3460605018fdc9612bce72735cba0d27efbcd9904780d44c7e3a9948f96148a" -dependencies = [ - "bytemuck", -] - -[[package]] -name = "semver" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" - -[[package]] -name = "serde" -version = "1.0.204" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.204" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.71", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "simba" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" -dependencies = [ - "approx", - "num-complex", - "num-traits", - "paste", - "wide", -] - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "statrs" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35a062dbadac17a42e0fc64c27f419b25d6fae98572eb43c8814c9e873d7721" -dependencies = [ - "approx", - "lazy_static", - "nalgebra", - "num-traits", - "rand", -] - -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - -[[package]] -name = "strum" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.71", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "thiserror" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.71", -] - -[[package]] -name = "tokio" -version = "1.38.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2caba9f80616f438e09748d5acda951967e1ea58508ef53d9c6402485a46df" -dependencies = [ - "backtrace", - "bytes", - "pin-project-lite", -] - -[[package]] -name = "tokio-util" -version = "0.7.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "triple_accel" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "utf8parse" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" -dependencies = [ - "serde", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wide" -version = "0.7.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2caba658a80831539b30698ae9862a72db6697dfdd7151e46920f5f2755c3ce2" -dependencies = [ - "bytemuck", - "safe_arch", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.71", -] diff --git a/Cargo.toml b/Cargo.toml index faf61a5..31ce0d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,4 @@ rusqlite = { version = "0.31.0", features = ["bundled", "array"] } rusqlite_migration = { version = "1.2.0" , features = ["from-directory"]} sha2 = "0.10.8" noodles = { version = "0.78.0", features = ["vcf", "fasta", "async"] } +petgraph = "0.6.5" diff --git a/migrations/01-initial/up.sql b/migrations/01-initial/up.sql index d507ca8..567bf21 100644 --- a/migrations/01-initial/up.sql +++ b/migrations/01-initial/up.sql @@ -13,12 +13,6 @@ CREATE TABLE sequence ( "length" INTEGER NOT NULL ); -CREATE TABLE path ( - id INTEGER PRIMARY KEY NOT NULL, - name TEXT NOT NULL, - path_index INTEGER NOT NULL DEFAULT 0 -); - CREATE TABLE block_group ( id INTEGER PRIMARY KEY NOT NULL, collection_name TEXT NOT NULL, @@ -44,7 +38,7 @@ CREATE UNIQUE INDEX block_uidx ON block(sequence_hash, block_group_id, start, en CREATE TABLE edges ( id INTEGER PRIMARY KEY NOT NULL, - source_id INTEGER NOT NULL, + source_id INTEGER, target_id INTEGER, chromosome_index INTEGER NOT NULL, phased INTEGER NOT NULL, @@ -52,5 +46,23 @@ CREATE TABLE edges ( FOREIGN KEY(target_id) REFERENCES block(id), constraint chk_phased check (phased in (0, 1)) ); - CREATE UNIQUE INDEX edge_uidx ON edges(source_id, target_id, chromosome_index, phased); + +CREATE TABLE path ( + id INTEGER PRIMARY KEY NOT NULL, + block_group_id INTEGER NOT NULL, + name TEXT NOT NULL, + FOREIGN KEY(block_group_id) REFERENCES block_group(id) +); +CREATE UNIQUE INDEX path_uidx ON path(block_group_id, name); + +CREATE TABLE path_edges ( + id INTEGER PRIMARY KEY NOT NULL, + path_id INTEGER NOT NULL, + source_edge_id INTEGER, + target_edge_id INTEGER, + FOREIGN KEY(source_edge_id) REFERENCES edges(id), + FOREIGN KEY(target_edge_id) REFERENCES edges(id), + FOREIGN KEY(path_id) REFERENCES path(id) +); +CREATE UNIQUE INDEX path_edge_uidx ON path_edges(path_id, source_edge_id, target_edge_id); \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 6bdcbd4..74da838 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,14 +7,14 @@ use std::path::PathBuf; use bio::io::fasta; use gen::get_connection; use gen::migrations::run_migrations; -use gen::models::{self, block::Block, edge::Edge, sequence::Sequence, BlockGroup}; +use gen::models::{self, block::Block, edge::Edge, path::Path, sequence::Sequence, BlockGroup}; use noodles::vcf; use noodles::vcf::variant::record::samples::series::value::genotype::Phasing; use noodles::vcf::variant::record::samples::series::Value; use noodles::vcf::variant::record::samples::{Sample, Series}; use noodles::vcf::variant::record::{AlternateBases, ReferenceBases, Samples}; use noodles::vcf::variant::Record; -use rusqlite::Connection; +use rusqlite::{types::Value as SQLValue, Connection}; use std::io; #[derive(Parser)] @@ -80,7 +80,14 @@ fn import_fasta(fasta: &String, name: &String, shallow: bool, conn: &mut Connect (sequence.len() as i32), &"1".to_string(), ); - let edge = Edge::create(conn, block.id, None, 0, 0); + let edge_1 = Edge::create(conn, None, Some(block.id), 0, 0); + let edge_2 = Edge::create(conn, Some(block.id), None, 0, 0); + Path::create( + conn, + record.id(), + block_group.id, + vec![edge_1.id, edge_2.id], + ); } println!("Created it"); } else { @@ -108,10 +115,8 @@ fn update_with_vcf(vcf_path: &String, collection_name: &String, conn: &mut Conne let ref_end = record.variant_end(&header).unwrap().get(); let alt_bases = record.alternate_bases(); let alt_alleles: Vec<_> = alt_bases.iter().collect::>().unwrap(); - let mut created: HashSet = HashSet::new(); for (sample_index, sample) in record.samples().iter().enumerate() { let genotype = sample.get(&header, "GT"); - let mut allele_blocks: HashMap = HashMap::new(); if genotype.is_some() { if let Value::Genotype(genotypes) = genotype.unwrap().unwrap().unwrap() { for (chromosome_index, gt) in genotypes.iter().enumerate() { @@ -137,6 +142,14 @@ fn update_with_vcf(vcf_path: &String, collection_name: &String, conn: &mut Conne &sample_names[sample_index], &seq_name, ); + let sample_path_id = Path::get_paths( + conn, + "select * from path where block_group_id = ?1 AND name = ?2", + vec![ + SQLValue::from(sample_bg_id), + SQLValue::from(seq_name.clone()), + ], + ); let new_block_id = Block::create( conn, &new_sequence_hash, @@ -145,10 +158,9 @@ fn update_with_vcf(vcf_path: &String, collection_name: &String, conn: &mut Conne alt_seq.len() as i32, &"1".to_string(), ); - println!("{sample_bg_id} {new_block_id:?} {chromosome_index} {phased} {allele}"); BlockGroup::insert_change( conn, - sample_bg_id, + sample_path_id[0].id, ref_start as i32, ref_end as i32, new_block_id.id, @@ -227,8 +239,16 @@ mod tests { ); update_with_vcf(&vcf_path.to_str().unwrap().to_string(), &collection, conn); assert_eq!( - BlockGroup::sequence(conn, &collection, Some(&"foo".to_string()), "m123"), - "ATCATCGATCGATCGATCGGGAACACACAGAGA" + BlockGroup::get_all_sequences(conn, 1), + HashSet::from_iter(vec!["ATCGATCGATCGATCGATCGGGAACACACAGAGA".to_string()]) + ); + assert_eq!( + BlockGroup::get_all_sequences(conn, 2), + HashSet::from_iter(vec!["ATCATCGATAGAGATCGATCGGGAACACACAGAGA".to_string()]) + ); + assert_eq!( + BlockGroup::get_all_sequences(conn, 3), + HashSet::from_iter(vec!["ATCATCGATCGATCGATCGGGAACACACAGAGA".to_string()]) ); } } diff --git a/src/models.rs b/src/models.rs index 91f1821..1ba1f8f 100644 --- a/src/models.rs +++ b/src/models.rs @@ -1,15 +1,24 @@ -use std::collections::hash_map::Entry::Vacant; -use std::collections::HashMap; -use std::fmt::*; - +use noodles::vcf::variant::record::info::field::value::array::Values; +use petgraph::data::Build; +use petgraph::graphmap::DiGraphMap; +use petgraph::visit::{Dfs, IntoNeighborsDirected, NodeCount}; +use petgraph::Direction; +use rusqlite::types::Value; use rusqlite::{params_from_iter, Connection}; +use std::collections::{HashMap, HashSet}; +use std::fmt::*; +use std::hash::Hash; pub mod block; pub mod edge; +pub mod path; pub mod sequence; + use crate::models; use crate::models::block::Block; use crate::models::edge::Edge; +use crate::models::path::{all_simple_paths, Path, PathEdge}; +use crate::models::sequence::Sequence; #[derive(Debug)] pub struct Collection { @@ -66,470 +75,6 @@ impl Sample { } } -#[derive(Debug)] -pub struct Path { - pub id: i32, - pub collection_name: String, - pub sample_name: Option, - pub name: String, - pub path_index: i32, -} - -impl Path { - // pub fn create( - // conn: &mut Connection, - // collection_name: &String, - // sample_name: Option<&String>, - // path_name: &String, - // path_index: Option, - // ) -> Path { - // let query = "INSERT INTO path (collection_name, sample_name, name, path_index) VALUES (?1, ?2, ?3, ?4) RETURNING *"; - // let mut stmt = conn.prepare(query).unwrap(); - // let index = path_index.unwrap_or(0); - // match stmt.query_row((collection_name, sample_name, path_name, index), |row| { - // Ok(Path { - // id: row.get(0)?, - // collection_name: row.get(1)?, - // sample_name: row.get(2)?, - // name: row.get(3)?, - // path_index: row.get(4)?, - // }) - // }) { - // Ok(path) => path, - // Err(rusqlite::Error::SqliteFailure(err, details)) => { - // if err.code == rusqlite::ErrorCode::ConstraintViolation { - // println!("{err:?} {details:?}"); - // Path { - // id: conn - // .query_row( - // "select id from path where collection_name = ?1 and sample_name is null and name = ?2 and path_index = ?3", - // (collection_name, path_name, index), - // |row| row.get(0), - // ) - // .unwrap(), - // collection_name: collection_name.clone(), - // sample_name: sample_name.map(|s| s.to_string()), - // name: path_name.clone(), - // path_index: index, - // } - // } else { - // panic!("something bad happened querying the database") - // } - // } - // Err(_) => { - // panic!("something bad happened querying the database") - // } - // } - // } - // - // pub fn clone(conn: &mut Connection, source_path_id: i32, target_path_id: i32) { - // let mut stmt = conn - // .prepare_cached( - // "SELECT id, sequence_hash, start, end, strand from block where path_id = ?1", - // ) - // .unwrap(); - // let mut block_map: HashMap = HashMap::new(); - // let mut it = stmt.query([source_path_id]).unwrap(); - // let mut row = it.next().unwrap(); - // while row.is_some() { - // let block = row.unwrap(); - // let block_id: i32 = block.get(0).unwrap(); - // let hash: String = block.get(1).unwrap(); - // let start = block.get(2).unwrap(); - // let end = block.get(3).unwrap(); - // let strand: String = block.get(4).unwrap(); - // let new_block = Block::create(conn, &hash, target_path_id, start, end, &strand); - // block_map.insert(block_id, new_block.id); - // row = it.next().unwrap(); - // } - // - // // todo: figure out rusqlite's rarray - // let mut stmt = conn - // .prepare_cached("SELECT source_id, target_id from edges where source_id IN (?1)") - // .unwrap(); - // let block_keys = block_map - // .keys() - // .map(|k| format!("{k}")) - // .collect::>() - // .join(", "); - // let mut it = stmt.query([block_keys]).unwrap(); - // let mut row = it.next().unwrap(); - // while row.is_some() { - // let edge = row.unwrap(); - // let source_id: i32 = edge.get(0).unwrap(); - // let target_id: Option = edge.get(1).unwrap(); - // Edge::create( - // conn, - // *block_map.get(&source_id).unwrap_or(&source_id), - // target_id, - // ); - // row = it.next().unwrap(); - // } - // } - // - // pub fn get_or_create_sample_path( - // conn: &mut Connection, - // collection_name: &String, - // sample_name: &String, - // path_name: &String, - // new_path_index: i32, - // ) -> i32 { - // let mut path_id : i32 = match conn.query_row( - // "select id from path where collection_name = ?1 AND sample_name = ?2 AND name = ?3 AND path_index = ?4", - // (collection_name, sample_name, path_name, new_path_index), - // |row| row.get(0), - // ) { - // Ok(res) => res, - // Err(rusqlite::Error::QueryReturnedNoRows) => 0, - // Err(_e) => { - // panic!("Error querying the database: {_e}"); - // } - // }; - // if path_id != 0 { - // return path_id; - // } else { - // // no path exists, so make it first -- check if we have a reference path for this sample first - // path_id = match conn.query_row( - // "select id from path where collection_name = ?1 AND sample_name = ?2 AND name = ?3 AND path_index = 0", - // (collection_name, sample_name, path_name), - // |row| row.get(0), - // ) { - // Ok(res) => res, - // Err(rusqlite::Error::QueryReturnedNoRows) => 0, - // Err(_e) => { - // panic!("something bad happened querying the database") - // } - // } - // } - // if path_id == 0 { - // // use the base reference bath if it exists since there is no base sample path - // path_id = match conn.query_row( - // "select path.id from path where collection_name = ?1 AND sample_name IS null AND name = ?2 AND path_index = 0", - // (collection_name, path_name), - // |row| row.get(0), - // ) { - // Ok(res) => res, - // Err(rusqlite::Error::QueryReturnedNoRows) => panic!("No base path exists"), - // Err(_e) => { - // panic!("something bad happened querying the database") - // } - // } - // } - // let new_path_id = Path::create( - // conn, - // collection_name, - // Some(sample_name), - // path_name, - // Some(new_path_index), - // ); - // - // // clone parent blocks/edges - // Path::clone(conn, path_id, new_path_id.id); - // - // new_path_id.id - // } - // - // #[allow(clippy::ptr_arg)] - // #[allow(clippy::too_many_arguments)] - // pub fn insert_change( - // conn: &mut Connection, - // path_id: i32, - // start: i32, - // end: i32, - // new_block_id: i32, - // ) { - // println!("change is {path_id} {start} {end} {new_block_id}"); - // // todo: - // // 1. get blocks where start-> end overlap - // // 2. split old blocks at boundry points, make new block for left/right side - // // 3. make new block for sequence we are changing - // // 4. update edges - // // add support for deletion - // // cases to check: - // // change that is the size of a block - // // change that goes over multiple blocks - // // change that hits just start/end boundry, e.g. block is 1,5 and change is 3,5 or 1,3. - // // change that deletes block boundry - // // https://stackoverflow.com/questions/3269434/whats-the-most-efficient-way-to-test-if-two-ranges-overlap - // let mut stmt = conn.prepare_cached("select b.id, b.sequence_hash, b.path_id, b.start, b.end, b.strand, e.id as edge_id, e.source_id, e.target_id from block b left join edges e on (e.source_id = b.id or e.target_id = b.id) where b.path_id = ?1 AND b.start <= ?3 AND ?2 <= b.end AND b.id != ?4;").unwrap(); - // let mut block_edges: HashMap> = HashMap::new(); - // let mut blocks: HashMap = HashMap::new(); - // let mut it = stmt.query([path_id, start, end, new_block_id]).unwrap(); - // let mut row = it.next().unwrap(); - // while row.is_some() { - // let entry = row.unwrap(); - // let block_id = entry.get(0).unwrap(); - // let edge_id: Option = entry.get(6).unwrap(); - // blocks.insert( - // block_id, - // Block { - // id: block_id, - // sequence_hash: entry.get(1).unwrap(), - // path_id: entry.get(2).unwrap(), - // start: entry.get(3).unwrap(), - // end: entry.get(4).unwrap(), - // strand: entry.get(5).unwrap(), - // }, - // ); - // if edge_id.is_some() { - // if let Vacant(e) = block_edges.entry(block_id) { - // e.insert(vec![Edge { - // id: edge_id.unwrap(), - // source_id: entry.get(7).unwrap(), - // target_id: entry.get(8).unwrap(), - // }]); - // } else { - // block_edges.get_mut(&block_id).unwrap().push(Edge { - // id: entry.get(6).unwrap(), - // source_id: entry.get(7).unwrap(), - // target_id: entry.get(8).unwrap(), - // }); - // } - // } else { - // println!("empty eid {row:?}"); - // } - // row = it.next().unwrap(); - // } - // - // #[derive(Debug)] - // struct ReplacementEdge { - // id: i32, - // new_source_id: Option, - // new_target_id: Option, - // } - // let mut replacement_edges: Vec = vec![]; - // let mut new_edges: Vec<(i32, i32)> = vec![]; - // - // for (block_id, block) in &blocks { - // let contains_start = block.start <= start && start < block.end; - // let contains_end = block.start <= end && end < block.end; - // - // if contains_start && contains_end { - // // our range is fully contained w/in the block - // // |----block------| - // // |----range---| - // let left_block = Block::create( - // conn, - // &block.sequence_hash, - // path_id, - // block.start, - // start, - // &block.strand, - // ); - // let right_block = Block::create( - // conn, - // &block.sequence_hash, - // path_id, - // end, - // block.end, - // &block.strand, - // ); - // println!("lb {left_block:?} {right_block:?}"); - // new_edges.push((left_block.id, new_block_id)); - // new_edges.push((new_block_id, right_block.id)); - // // what stuff went to this block? - // for edges in block_edges.get(block_id) { - // for edge in edges { - // println!("block {block_id} on edge {edge:?}"); - // let mut new_source_id = None; - // let mut new_target_id = None; - // if edge.source_id == *block_id { - // new_source_id = Some(right_block.id); - // } - // if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - // new_target_id = Some(left_block.id); - // } - // replacement_edges.push(ReplacementEdge { - // id: edge.id, - // new_source_id, - // new_target_id, - // }); - // println!("new res {replacement_edges:?}"); - // } - // } - // } else if contains_start { - // // our range is overlapping the end of the block - // // |----block---| - // // |----range---| - // let left_block = Block::create( - // conn, - // &block.sequence_hash, - // path_id, - // block.start, - // start, - // &block.strand, - // ); - // new_edges.push((left_block.id, new_block_id)); - // // what stuff went to this block? - // for edges in block_edges.get(block_id) { - // for edge in edges { - // let mut new_source_id = None; - // let mut new_target_id = None; - // if edge.source_id == *block_id { - // new_source_id = Some(new_block_id); - // } - // if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - // new_target_id = Some(left_block.id); - // } - // replacement_edges.push(ReplacementEdge { - // id: edge.id, - // new_source_id, - // new_target_id, - // }); - // } - // } - // } else if contains_end { - // // our range is overlapping the beginning of the block - // // |----block---| - // // |----range---| - // let right_block = Block::create( - // conn, - // &block.sequence_hash, - // path_id, - // end, - // block.end, - // &block.strand, - // ); - // // what stuff went to this block? - // new_edges.push((new_block_id, right_block.id)); - // for edges in block_edges.get(block_id) { - // for edge in edges { - // let mut new_source_id = None; - // let mut new_target_id = None; - // if edge.source_id == *block_id { - // new_source_id = Some(right_block.id); - // } - // if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - // new_target_id = Some(new_block_id); - // } - // replacement_edges.push(ReplacementEdge { - // id: edge.id, - // new_source_id, - // new_target_id, - // }) - // } - // } - // } else { - // // our range is the whole block, get rid of it - // // |--block---| - // // |-----range------| - // // what stuff went to this block? - // for edges in block_edges.get(block_id) { - // for edge in edges { - // let mut new_source_id = None; - // let mut new_target_id = None; - // if edge.source_id == *block_id { - // new_source_id = Some(new_block_id); - // } - // if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - // new_target_id = Some(new_block_id); - // } - // replacement_edges.push(ReplacementEdge { - // id: edge.id, - // new_source_id, - // new_target_id, - // }) - // } - // } - // } - // } - // - // for replacement_edge in replacement_edges { - // let mut exist_query; - // let mut update_query; - // let mut placeholders: Vec = vec![]; - // if replacement_edge.new_source_id.is_some() && replacement_edge.new_target_id.is_some() - // { - // exist_query = "select id from edges where source_id = ?1 and target_id = ?2;"; - // update_query = "update edges set source_id = ?1 AND target_id = ?2 where id = ?3"; - // placeholders.push(replacement_edge.new_source_id.unwrap()); - // placeholders.push(replacement_edge.new_target_id.unwrap()); - // } else if replacement_edge.new_source_id.is_some() { - // exist_query = "select id from edges where source_id = ?1 and target_id is null;"; - // update_query = "update edges set source_id = ?1 where id = ?2"; - // placeholders.push(replacement_edge.new_source_id.unwrap()); - // } else if replacement_edge.new_target_id.is_some() { - // exist_query = "select id from edges where source_id is null and target_id = ?1;"; - // update_query = "update edges set target_id = ?1 where id = ?2"; - // placeholders.push(replacement_edge.new_target_id.unwrap()); - // } else { - // continue; - // } - // println!("{exist_query:?} {update_query} {placeholders:?}"); - // - // let mut stmt = conn.prepare_cached(exist_query).unwrap(); - // if !stmt.exists(params_from_iter(&placeholders)).unwrap() { - // placeholders.push(replacement_edge.id); - // println!("updating {exist_query:?} {update_query} {placeholders:?}"); - // let mut stmt = conn.prepare_cached(update_query).unwrap(); - // stmt.execute(params_from_iter(&placeholders)).unwrap(); - // } else { - // println!("edge exists"); - // } - // } - // for new_edge in new_edges { - // Edge::create(conn, new_edge.0, Some(new_edge.1)); - // } - // - // let block_keys = blocks - // .keys() - // .map(|k| format!("{k}")) - // .collect::>() - // .join(", "); - // let mut stmt = conn - // .prepare_cached("DELETE from block where id IN (?1)") - // .unwrap(); - // stmt.execute([block_keys]).unwrap(); - // } - // - // pub fn sequence( - // conn: &mut Connection, - // collection_name: &str, - // sample_name: Option<&String>, - // path_name: &str, - // path_index: i32, - // ) -> String { - // struct SequenceBlock { - // sequence: String, - // strand: String, - // } - // let mut query; - // let mut placeholders: Vec = - // vec![collection_name.to_string().into()]; - // - // if sample_name.is_some() { - // query = "WITH RECURSIVE traverse(block_id, block_sequence, block_start, block_end, block_strand, depth) AS ( - // SELECT edges.source_id, substr(seq.sequence, block.start + 1, block.end - block.start), block.start, block.end, block.strand, 0 as depth FROM path left join block on (path.id = block.path_id) left join sequence seq on (seq.hash = block.sequence_hash) left join edges on (block.id = edges.source_id or block.id = edges.target_id) WHERE path.collection_name = ?1 AND path.sample_name = ?2 AND path.name = ?3 AND path.path_index = ?4 and edges.target_id is null - // UNION - // SELECT e2.source_id, substr(seq2.sequence, b2.start + 1, b2.end - b2.start), b2.start, b2.end, b2.strand, depth + 1 FROM edges e2 left join block b2 on (b2.id = e2.source_id) left join sequence seq2 on (seq2.hash = b2.sequence_hash) JOIN traverse t2 ON e2.target_id = t2.block_id - // ) SELECT block_sequence as sequence, block_strand as strand FROM traverse order by depth desc;"; - // placeholders.push(sample_name.unwrap().clone().into()); - // } else { - // query = "WITH RECURSIVE traverse(block_id, block_sequence, block_start, block_end, block_strand, depth) AS ( - // SELECT edges.source_id, substr(seq.sequence, block.start + 1, block.end - block.start), block.start, block.end, block.strand, 0 as depth FROM path left join block on (path.id = block.path_id) left join sequence seq on (seq.hash = block.sequence_hash) left join edges on (block.id = edges.source_id or block.id = edges.target_id) WHERE path.collection_name = ?1 AND path.sample_name is null AND path.name = ?2 AND path.path_index = ?3 and edges.target_id is null - // UNION - // SELECT e2.source_id, substr(seq2.sequence, b2.start + 1, b2.end - b2.start), b2.start, b2.end, b2.strand, depth + 1 FROM edges e2 left join block b2 on (b2.id = e2.source_id) left join sequence seq2 on (seq2.hash = b2.sequence_hash) JOIN traverse t2 ON e2.target_id = t2.block_id - // ) SELECT block_sequence as sequence, block_strand as strand FROM traverse order by depth desc;" - // } - // placeholders.push(path_name.to_string().into()); - // placeholders.push(path_index.into()); - // let mut stmt = conn.prepare(query).unwrap(); - // let mut blocks = stmt - // .query_map(params_from_iter(placeholders), |row| { - // Ok(SequenceBlock { - // sequence: row.get(0)?, - // strand: row.get(1)?, - // }) - // }) - // .unwrap(); - // let mut sequence = "".to_string(); - // for block in blocks { - // sequence.push_str(&block.unwrap().sequence); - // } - // sequence - // } -} - #[derive(Debug)] pub struct BlockGroup { pub id: i32, @@ -581,14 +126,14 @@ impl BlockGroup { } } - pub fn clone(conn: &mut Connection, source_id: i32, target_id: i32) { + pub fn clone(conn: &mut Connection, source_block_group_id: i32, target_block_group_id: i32) { let mut stmt = conn .prepare_cached( "SELECT id, sequence_hash, start, end, strand from block where block_group_id = ?1", ) .unwrap(); let mut block_map: HashMap = HashMap::new(); - let mut it = stmt.query([source_id]).unwrap(); + let mut it = stmt.query([source_block_group_id]).unwrap(); let mut row = it.next().unwrap(); while row.is_some() { let block = row.unwrap(); @@ -597,14 +142,14 @@ impl BlockGroup { let start = block.get(2).unwrap(); let end = block.get(3).unwrap(); let strand: String = block.get(4).unwrap(); - let new_block = Block::create(conn, &hash, target_id, start, end, &strand); + let new_block = Block::create(conn, &hash, target_block_group_id, start, end, &strand); block_map.insert(block_id, new_block.id); row = it.next().unwrap(); } // todo: figure out rusqlite's rarray let mut stmt = conn - .prepare_cached("SELECT source_id, target_id from edges where source_id IN (?1)") + .prepare_cached("SELECT id, source_id, target_id, chromosome_index, phased from edges where source_id IN (?1) OR target_id in (?1)") .unwrap(); let block_keys = block_map .keys() @@ -613,19 +158,67 @@ impl BlockGroup { .join(", "); let mut it = stmt.query([block_keys]).unwrap(); let mut row = it.next().unwrap(); + let mut edge_map = HashMap::new(); while row.is_some() { let edge = row.unwrap(); - let source_id: i32 = edge.get(0).unwrap(); - let target_id: Option = edge.get(1).unwrap(); - Edge::create( - conn, - *block_map.get(&source_id).unwrap_or(&source_id), - target_id, - 0, - 0, - ); + let edge_id: i32 = edge.get(0).unwrap(); + let source_id: Option = edge.get(1).unwrap(); + let target_id: Option = edge.get(2).unwrap(); + let chrom_index = edge.get(3).unwrap(); + let phased = edge.get(4).unwrap(); + let mut new_edge; + if target_id.is_some() && source_id.is_some() { + let target_id = target_id.unwrap(); + let source_id = source_id.unwrap(); + new_edge = Edge::create( + conn, + Some(*block_map.get(&source_id).unwrap_or(&source_id)), + Some(*block_map.get(&target_id).unwrap_or(&target_id)), + chrom_index, + phased, + ); + } else if target_id.is_some() { + let target_id = target_id.unwrap(); + new_edge = Edge::create( + conn, + None, + Some(*block_map.get(&target_id).unwrap_or(&target_id)), + chrom_index, + phased, + ); + } else if source_id.is_some() { + let source_id = source_id.unwrap(); + new_edge = Edge::create( + conn, + Some(*block_map.get(&source_id).unwrap_or(&source_id)), + None, + 0, + 0, + ); + } else { + panic!("no source and target specified."); + } + edge_map.insert(edge_id, new_edge.id); + row = it.next().unwrap(); } + println!("new edges {edge_map:?}"); + + let existing_paths = Path::get_paths( + conn, + "SELECT * from path where block_group_id = ?1", + vec![Value::from(source_block_group_id)], + ); + println!("eps {existing_paths:?}"); + + for path in existing_paths { + let mut new_edges = vec![]; + for edge in path.edges { + new_edges.push(*edge_map.get(&edge).unwrap()); + } + let new_p = Path::create(conn, &path.name, target_block_group_id, new_edges); + println!("made {new_p:?}"); + } } pub fn get_or_create_sample_block_group( @@ -663,47 +256,120 @@ impl BlockGroup { } let new_bg_id = BlockGroup::create(conn, collection_name, Some(sample_name), group_name); - // clone parent blocks/edges + // clone parent blocks/edges/path BlockGroup::clone(conn, bg_id, new_bg_id.id); new_bg_id.id } + pub fn get_all_sequences(conn: &Connection, block_group_id: i32) -> HashSet { + let mut block_map = HashMap::new(); + for block in Block::get_blocks( + conn, + "select * from block where block_group_id = ?1", + vec![Value::from(block_group_id)], + ) { + block_map.insert(block.id, block); + } + let sequence_hashes = block_map + .values() + .map(|block| format!("\"{id}\"", id = block.sequence_hash)) + .collect::>() + .join(","); + let mut sequence_map = HashMap::new(); + for sequence in Sequence::get_sequences( + conn, + &format!("select * from sequence where hash in ({sequence_hashes})"), + vec![], + ) { + sequence_map.insert(sequence.hash, sequence.sequence); + } + let block_ids = block_map + .keys() + .map(|id| format!("{id}")) + .collect::>() + .join(","); + let edges = Edge::get_edges(conn, &format!("select * from edges where source_id in ({block_ids}) OR target_id in ({block_ids})"), vec![]); + let mut graph: DiGraphMap = DiGraphMap::new(); + for block_id in block_map.keys() { + graph.add_node(*block_id); + } + for edge in edges { + if edge.source_id.is_some() && edge.target_id.is_some() { + graph.add_edge(edge.source_id.unwrap(), edge.target_id.unwrap(), ()); + } + } + let mut start_nodes = vec![]; + let mut end_nodes = vec![]; + for node in graph.nodes() { + let has_incoming = graph.neighbors_directed(node, Direction::Incoming).next(); + let has_outgoing = graph.neighbors_directed(node, Direction::Outgoing).next(); + if has_incoming.is_none() { + start_nodes.push(node); + } + if has_outgoing.is_none() { + end_nodes.push(node); + } + } + let mut sequences = HashSet::new(); + for start_node in start_nodes { + for end_node in &end_nodes { + for path in all_simple_paths(&graph, start_node, *end_node) { + let mut current_sequence = "".to_string(); + for node in path { + let block = block_map.get(&node).unwrap(); + let block_sequence = sequence_map.get(&block.sequence_hash).unwrap(); + current_sequence.push_str( + &block_sequence[(block.start as usize)..(block.end as usize)], + ); + } + sequences.insert(current_sequence); + } + } + } + sequences + } + #[allow(clippy::ptr_arg)] #[allow(clippy::too_many_arguments)] pub fn insert_change( conn: &mut Connection, - block_group_id: i32, + path_id: i32, start: i32, end: i32, new_block_id: i32, chromosome_index: i32, phased: i32, ) { - println!("change is {block_group_id} {start} {end} {new_block_id}"); + println!("change is {path_id} {start} {end} {new_block_id}"); // todo: // 1. get blocks where start-> end overlap - // 2. split old blocks at boundry points, make new block for left/right side + // 2. split old blocks at boundary points, make new block for left/right side // 3. make new block for sequence we are changing // 4. update edges // add support for deletion // cases to check: // change that is the size of a block // change that goes over multiple blocks - // change that hits just start/end boundry, e.g. block is 1,5 and change is 3,5 or 1,3. - // change that deletes block boundry + // change that hits just start/end boundary, e.g. block is 1,5 and change is 3,5 or 1,3. + // change that deletes block boundary // https://stackoverflow.com/questions/3269434/whats-the-most-efficient-way-to-test-if-two-ranges-overlap - let mut stmt = conn.prepare_cached("select b.id, b.sequence_hash, b.block_group_id, b.start, b.end, b.strand, e.id as edge_id, e.source_id, e.target_id, e.chromosome_index, e.phased from block b left join edges e on (e.source_id = b.id or e.target_id = b.id) where b.block_group_id = ?1 AND b.start <= ?3 AND ?2 <= b.end AND b.id != ?4 AND e.chromosome_index = 0;").unwrap(); - let mut block_edges: HashMap> = HashMap::new(); + + // check if we've already inserted this for edges connected + // that means we have an edge with the chromosome index, that connects our start/end coordinates with the new block id + + let path = Path::get(conn, path_id); + let block_group_id = path.block_group_id; + let graph = PathEdge::edges_to_graph(conn, path.id); + println!("{path:?} {graph:?}"); + let query = format!("SELECT id, sequence_hash, block_group_id, start, end, strand from block where id in ({block_ids})", block_ids = graph.nodes().map(|k| format!("{k}")).collect::>().join(",")); + let mut stmt = conn.prepare(&query).unwrap(); let mut blocks: HashMap = HashMap::new(); - let mut it = stmt - .query([block_group_id, start, end, new_block_id]) - .unwrap(); + let mut it = stmt.query([]).unwrap(); let mut row = it.next().unwrap(); while row.is_some() { let entry = row.unwrap(); let block_id = entry.get(0).unwrap(); - let edge_id: Option = entry.get(6).unwrap(); blocks.insert( block_id, Block { @@ -715,219 +381,134 @@ impl BlockGroup { strand: entry.get(5).unwrap(), }, ); - if edge_id.is_some() { - if let Vacant(e) = block_edges.entry(block_id) { - e.insert(vec![Edge { - id: edge_id.unwrap(), - source_id: entry.get(7).unwrap(), - target_id: entry.get(8).unwrap(), - chromosome_index: entry.get(9).unwrap(), - phased: entry.get(10).unwrap(), - }]); - } else { - block_edges.get_mut(&block_id).unwrap().push(Edge { - id: entry.get(6).unwrap(), - source_id: entry.get(7).unwrap(), - target_id: entry.get(8).unwrap(), - chromosome_index: entry.get(9).unwrap(), - phased: entry.get(10).unwrap(), - }); - } - } else { - println!("empty eid {row:?}"); - } row = it.next().unwrap(); } - - #[derive(Debug)] - struct ReplacementEdge { - id: i32, - new_source_id: Option, - new_target_id: Option, + // TODO: probably don't need the graph, just get vector of source_ids. + let mut start_node = -1; + let start_edge = Edge::get(conn, path.edges[0]); + if let Some(value) = start_edge.source_id { + start_node = value + } else if let Some(value) = start_edge.target_id { + start_node = value } - let mut replacement_edges: Vec = vec![]; - let mut new_edges: Vec<(i32, i32)> = vec![]; + let mut dfs = Dfs::new(&graph, start_node as u32); + let mut path_start = 0; + let mut path_end = 0; + let mut new_edges = vec![]; + let mut previous_block: Option<&Block> = None; + let mut next_node = dfs.next(&graph); + println!("{blocks:?}"); + while next_node.is_some() { + let nx = next_node.unwrap(); + println!("nx is {nx}"); + let block = blocks.get(&(nx as i32)).unwrap(); + let block_length = (block.end - block.start); + path_end += block_length; - for (block_id, block) in &blocks { - let contains_start = block.start <= start && start < block.end; - let contains_end = block.start <= end && end < block.end; + // do stuff here + println!("{nx} {block:?} {start} {end} {path_start} {path_end}"); + + let contains_start = path_start <= start && start < path_end; + let contains_end = path_start <= end && end < path_end; + let overlap = path_start <= end && start <= path_end; if contains_start && contains_end { // our range is fully contained w/in the block // |----block------| // |----range---| - let left_block = Block::create( - conn, - &block.sequence_hash, - block_group_id, - block.start, - start, - &block.strand, - ); - let right_block = Block::create( - conn, - &block.sequence_hash, - block_group_id, - end, - block.end, - &block.strand, - ); - println!("lb {left_block:?} {right_block:?}"); - new_edges.push((left_block.id, new_block_id)); - new_edges.push((new_block_id, right_block.id)); - // what stuff went to this block? - for edges in block_edges.get(block_id) { - for edge in edges { - println!("block {block_id} on edge {edge:?}"); - let mut new_source_id = None; - let mut new_target_id = None; - if edge.source_id == *block_id { - new_source_id = Some(right_block.id); - } - if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - new_target_id = Some(left_block.id); - } - replacement_edges.push(ReplacementEdge { - id: edge.id, - new_source_id, - new_target_id, - }); - println!("new res {replacement_edges:?}"); - } - } + let (left_block, right_block) = + Block::split(conn, block, start - path_start, chromosome_index, phased) + .unwrap(); + Block::delete(conn, block.id); + // let left_block = Block::create( + // conn, + // &block.sequence_hash, + // block_group_id, + // block.start, + // start - path_start, + // &block.strand, + // ); + // let right_block = Block::create( + // conn, + // &block.sequence_hash, + // block_group_id, + // block.start + (end - path_start), + // block.end, + // &block.strand, + // ); + // if let Some(value) = previous_block { + // new_edges.push((Some(value.id), Some(left_block.id))) + // } + new_edges.push((Some(left_block.id), Some(new_block_id))); + new_edges.push((Some(new_block_id), Some(right_block.id))); } else if contains_start { // our range is overlapping the end of the block // |----block---| // |----range---| - let left_block = Block::create( - conn, - &block.sequence_hash, - block_group_id, - block.start, - start, - &block.strand, - ); - new_edges.push((left_block.id, new_block_id)); - // what stuff went to this block? - for edges in block_edges.get(block_id) { - for edge in edges { - let mut new_source_id = None; - let mut new_target_id = None; - if edge.source_id == *block_id { - new_source_id = Some(new_block_id); - } - if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - new_target_id = Some(left_block.id); - } - replacement_edges.push(ReplacementEdge { - id: edge.id, - new_source_id, - new_target_id, - }); - } - } + let (left_block, right_block) = + Block::split(conn, block, start - path_start, chromosome_index, phased) + .unwrap(); + Block::delete(conn, block.id); + // let left_block = Block::create( + // conn, + // &block.sequence_hash, + // block_group_id, + // block.start, + // start - path_start, + // &block.strand, + // ); + // if let Some(value) = previous_block { + // new_edges.push((Some(value.id), Some(left_block.id))); + // } else { + // new_edges.push((None, Some(left_block.id))); + // } + new_edges.push((Some(left_block.id), Some(new_block_id))); } else if contains_end { // our range is overlapping the beginning of the block // |----block---| // |----range---| - let right_block = Block::create( - conn, - &block.sequence_hash, - block_group_id, - end, - block.end, - &block.strand, - ); - // what stuff went to this block? - new_edges.push((new_block_id, right_block.id)); - for edges in block_edges.get(block_id) { - for edge in edges { - let mut new_source_id = None; - let mut new_target_id = None; - if edge.source_id == *block_id { - new_source_id = Some(right_block.id); - } - if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - new_target_id = Some(new_block_id); - } - replacement_edges.push(ReplacementEdge { - id: edge.id, - new_source_id, - new_target_id, - }) - } - } - } else { - // our range is the whole block, get rid of it + let (left_block, right_block) = + Block::split(conn, block, end - path_start, chromosome_index, phased).unwrap(); + Block::delete(conn, block.id); + // let right_block = Block::create( + // conn, + // &block.sequence_hash, + // block_group_id, + // end - path_start, + // block.end, + // &block.strand, + // ); + // // what stuff went to this block? + new_edges.push((Some(new_block_id), Some(right_block.id))); + // let last_node = dfs.next(&graph); + // if last_node.is_some() { + // let next_block = blocks.get(&(last_node.unwrap() as i32)).unwrap(); + // new_edges.push((Some(right_block.id), Some(next_block.id))); + // } else { + // new_edges.push((Some(right_block.id), None)) + // } + break; + } else if overlap { + // our range is the whole block, ignore it // |--block---| // |-----range------| - // what stuff went to this block? - for edges in block_edges.get(block_id) { - for edge in edges { - let mut new_source_id = None; - let mut new_target_id = None; - if edge.source_id == *block_id { - new_source_id = Some(new_block_id); - } - if edge.target_id.is_some() && edge.target_id.unwrap() == *block_id { - new_target_id = Some(new_block_id); - } - replacement_edges.push(ReplacementEdge { - id: edge.id, - new_source_id, - new_target_id, - }) - } - } - } - } - - for replacement_edge in replacement_edges { - let mut exist_query; - let mut update_query; - let mut placeholders: Vec = vec![]; - if replacement_edge.new_source_id.is_some() && replacement_edge.new_target_id.is_some() - { - exist_query = "select id from edges where source_id = ?1 and target_id = ?2;"; - update_query = "update edges set source_id = ?1 AND target_id = ?2 where id = ?3"; - placeholders.push(replacement_edge.new_source_id.unwrap()); - placeholders.push(replacement_edge.new_target_id.unwrap()); - } else if replacement_edge.new_source_id.is_some() { - exist_query = "select id from edges where source_id = ?1 and target_id is null;"; - update_query = "update edges set source_id = ?1 where id = ?2"; - placeholders.push(replacement_edge.new_source_id.unwrap()); - } else if replacement_edge.new_target_id.is_some() { - exist_query = "select id from edges where source_id is null and target_id = ?1;"; - update_query = "update edges set target_id = ?1 where id = ?2"; - placeholders.push(replacement_edge.new_target_id.unwrap()); } else { - continue; + // not yet at the range } - println!("{exist_query:?} {update_query} {placeholders:?}"); - - let mut stmt = conn.prepare_cached(exist_query).unwrap(); - if !stmt.exists(params_from_iter(&placeholders)).unwrap() { - placeholders.push(replacement_edge.id); - println!("updating {exist_query:?} {update_query} {placeholders:?}"); - let mut stmt = conn.prepare_cached(update_query).unwrap(); - stmt.execute(params_from_iter(&placeholders)).unwrap(); - } else { - println!("edge exists"); + + path_start += block_length; + if path_start > end { + break; } + previous_block = Some(block); + next_node = dfs.next(&graph); } + + println!("change is {path:?} {graph:?} {blocks:?} {new_edges:?}"); + for new_edge in new_edges { - Edge::create(conn, new_edge.0, Some(new_edge.1), chromosome_index, phased); + Edge::create(conn, new_edge.0, new_edge.1, chromosome_index, phased); } - - let block_keys = blocks - .keys() - .map(|k| format!("{k}")) - .collect::>() - .join(", "); - let mut stmt = conn - .prepare_cached("DELETE from block where id IN (?1)") - .unwrap(); - stmt.execute([block_keys]).unwrap(); } // TODO: move this to path, doesn't belong in block group @@ -949,15 +530,15 @@ impl BlockGroup { query = "WITH RECURSIVE traverse(block_id, block_sequence, block_start, block_end, block_strand, depth) AS ( SELECT edges.source_id, substr(seq.sequence, block.start + 1, block.end - block.start), block.start, block.end, block.strand, 0 as depth FROM block_group left join block on (block_group.id = block.block_group_id) left join sequence seq on (seq.hash = block.sequence_hash) left join edges on (block.id = edges.source_id or block.id = edges.target_id) WHERE block_group.collection_name = ?1 AND block_group.sample_name = ?2 AND block_group.name = ?3 and edges.target_id is null UNION - SELECT e2.source_id, substr(seq2.sequence, b2.start + 1, b2.end - b2.start), b2.start, b2.end, b2.strand, depth + 1 FROM edges e2 left join block b2 on (b2.id = e2.source_id) left join sequence seq2 on (seq2.hash = b2.sequence_hash) JOIN traverse t2 ON e2.target_id = t2.block_id - ) SELECT block_sequence as sequence, block_strand as strand FROM traverse order by depth desc;"; + SELECT e2.source_id, substr(seq2.sequence, b2.start + 1, b2.end - b2.start), b2.start, b2.end, b2.strand, depth + 1 FROM edges e2 left join block b2 on (b2.id = e2.source_id) left join sequence seq2 on (seq2.hash = b2.sequence_hash) JOIN traverse t2 ON e2.target_id = t2.block_id order by depth desc + ) SELECT block_sequence as sequence, block_strand as strand FROM traverse;"; placeholders.push(sample_name.unwrap().clone().into()); } else { query = "WITH RECURSIVE traverse(block_id, block_sequence, block_start, block_end, block_strand, depth) AS ( SELECT edges.source_id, substr(seq.sequence, block.start + 1, block.end - block.start), block.start, block.end, block.strand, 0 as depth FROM block_group left join block on (block_group.id = block.block_group_id) left join sequence seq on (seq.hash = block.sequence_hash) left join edges on (block.id = edges.source_id or block.id = edges.target_id) WHERE block_group.collection_name = ?1 AND block_group.sample_name is null AND block_group.name = ?2 and edges.target_id is null UNION - SELECT e2.source_id, substr(seq2.sequence, b2.start + 1, b2.end - b2.start), b2.start, b2.end, b2.strand, depth + 1 FROM edges e2 left join block b2 on (b2.id = e2.source_id) left join sequence seq2 on (seq2.hash = b2.sequence_hash) JOIN traverse t2 ON e2.target_id = t2.block_id - ) SELECT block_sequence as sequence, block_strand as strand FROM traverse order by depth desc;" + SELECT e2.source_id, substr(seq2.sequence, b2.start + 1, b2.end - b2.start), b2.start, b2.end, b2.strand, depth + 1 FROM edges e2 left join block b2 on (b2.id = e2.source_id) left join sequence seq2 on (seq2.hash = b2.sequence_hash) JOIN traverse t2 ON e2.target_id = t2.block_id order by depth desc + ) SELECT block_sequence as sequence, block_strand as strand FROM traverse;" } placeholders.push(block_group_name.to_string().into()); let mut stmt = conn.prepare(query).unwrap(); @@ -976,3 +557,95 @@ impl BlockGroup { sequence } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::get_connection as get_db_connection; + use crate::migrations::run_migrations; + use std::fs; + use std::hash::Hash; + + fn get_connection() -> Connection { + let mut conn = Connection::open_in_memory() + .unwrap_or_else(|_| panic!("Error opening in memory test db")); + run_migrations(&mut conn); + conn + } + + fn setup_block_group(conn: &mut Connection) -> (i32, i32) { + let a_seq_hash = Sequence::create(conn, "DNA".to_string(), &"AAAAAAAAAA".to_string(), true); + let t_seq_hash = Sequence::create(conn, "DNA".to_string(), &"TTTTTTTTTT".to_string(), true); + let c_seq_hash = Sequence::create(conn, "DNA".to_string(), &"CCCCCCCCCC".to_string(), true); + let g_seq_hash = Sequence::create(conn, "DNA".to_string(), &"GGGGGGGGGG".to_string(), true); + let collection = Collection::create(conn, &"test".to_string()); + let block_group = BlockGroup::create(conn, &"test".to_string(), None, &"hg19".to_string()); + let a_block = Block::create(conn, &a_seq_hash, block_group.id, 0, 10, &"1".to_string()); + let t_block = Block::create(conn, &t_seq_hash, block_group.id, 0, 10, &"1".to_string()); + let c_block = Block::create(conn, &c_seq_hash, block_group.id, 0, 10, &"1".to_string()); + let g_block = Block::create(conn, &g_seq_hash, block_group.id, 0, 10, &"1".to_string()); + let edge_0 = Edge::create(conn, None, Some(a_block.id), 0, 0); + let edge_1 = Edge::create(conn, Some(a_block.id), Some(t_block.id), 0, 0); + let edge_2 = Edge::create(conn, Some(t_block.id), Some(c_block.id), 0, 0); + let edge_3 = Edge::create(conn, Some(c_block.id), Some(g_block.id), 0, 0); + let edge_4 = Edge::create(conn, Some(g_block.id), None, 0, 0); + let path = Path::create( + conn, + "chr1", + block_group.id, + vec![edge_0.id, edge_1.id, edge_2.id, edge_3.id, edge_4.id], + ); + (block_group.id, path.id) + } + + #[test] + fn simple_insert() { + fs::remove_file("test.db"); + let mut conn = get_db_connection("test.db"); + let (block_group_id, path_id) = setup_block_group(&mut conn); + let insert_sequence = + Sequence::create(&mut conn, "DNA".to_string(), &"NNNN".to_string(), true); + let insert = Block::create( + &conn, + &insert_sequence, + block_group_id, + 0, + 4, + &"1".to_string(), + ); + BlockGroup::insert_change(&mut conn, path_id, 7, 15, insert.id, 1, 0); + + let all_sequences = BlockGroup::get_all_sequences(&conn, block_group_id); + assert_eq!( + all_sequences, + HashSet::from_iter(vec![ + "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string() + ]) + ); + + // TODO: should handle this w/ edges instead of a block, maybe this is ok though. + let deletion_sequence = + Sequence::create(&mut conn, "DNA".to_string(), &"".to_string(), true); + let deletion = Block::create( + &conn, + &deletion_sequence, + block_group_id, + 0, + 0, + &"1".to_string(), + ); + + // take out an entire block. + BlockGroup::insert_change(&mut conn, path_id, 19, 31, deletion.id, 1, 0); + let all_sequences = BlockGroup::get_all_sequences(&conn, block_group_id); + assert_eq!( + all_sequences, + HashSet::from_iter(vec![ + "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(), + "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string() + ]) + ) + } +} diff --git a/src/models/block.rs b/src/models/block.rs index 0802f99..e140a53 100644 --- a/src/models/block.rs +++ b/src/models/block.rs @@ -1,4 +1,5 @@ -use rusqlite::Connection; +use crate::models::Path; +use rusqlite::{params_from_iter, types::Value, Connection}; use crate::models::edge::{Edge, UpdatedEdge}; @@ -61,6 +62,13 @@ impl Block { } } + pub fn delete(conn: &Connection, block_id: i32) { + let mut stmt = conn + .prepare_cached("DELETE from block where id = ?1") + .unwrap(); + stmt.execute((block_id,)).unwrap(); + } + pub fn edges_into(conn: &Connection, block_id: i32) -> Vec { let edge_query = "select id, source_id, target_id, chromosome_index, phased from edges where target_id = ?1;"; let mut stmt = conn.prepare_cached(edge_query).unwrap(); @@ -71,14 +79,14 @@ impl Block { while row.is_some() { let edge = row.unwrap(); let edge_id: i32 = edge.get(0).unwrap(); - let source_block_id: i32 = edge.get(1).unwrap(); - let target_block_id: i32 = edge.get(2).unwrap(); + let source_block_id: Option = edge.get(1).unwrap(); + let target_block_id: Option = edge.get(2).unwrap(); let chromosome_index: i32 = edge.get(3).unwrap(); let phased: i32 = edge.get(4).unwrap(); edges.push(Edge { id: edge_id, source_id: source_block_id, - target_id: Some(target_block_id), + target_id: target_block_id, chromosome_index, phased, }); @@ -98,14 +106,14 @@ impl Block { while row.is_some() { let edge = row.unwrap(); let edge_id: i32 = edge.get(0).unwrap(); - let source_block_id: i32 = edge.get(1).unwrap(); - let target_block_id: i32 = edge.get(2).unwrap(); + let source_block_id: Option = edge.get(1).unwrap(); + let target_block_id: Option = edge.get(2).unwrap(); let chromosome_index: i32 = edge.get(3).unwrap(); let phased: i32 = edge.get(4).unwrap(); edges.push(Edge { id: edge_id, source_id: source_block_id, - target_id: Some(target_block_id), + target_id: target_block_id, chromosome_index, phased, }); @@ -117,7 +125,7 @@ impl Block { pub fn split( conn: &Connection, - block: Block, + block: &Block, coordinate: i32, chromosome_index: i32, phased: i32, @@ -150,7 +158,7 @@ impl Block { for edge in edges_into.iter() { replacement_edges.push(UpdatedEdge { id: edge.id, - new_source_id: Some(edge.source_id), + new_source_id: edge.source_id, new_target_id: Some(new_left_block.id), }); } @@ -167,7 +175,7 @@ impl Block { Edge::create( conn, - new_left_block.id, + Some(new_left_block.id), Some(new_right_block.id), chromosome_index, phased, @@ -175,10 +183,31 @@ impl Block { Edge::bulk_update(conn, replacement_edges); - // TODO: Delete existing block? + // TODO: Delete existing block? -- leave to caller atm Some((new_left_block, new_right_block)) } + + pub fn get_blocks(conn: &Connection, query: &str, placeholders: Vec) -> Vec { + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map(params_from_iter(placeholders), |row| { + Ok(Block { + id: row.get(0)?, + sequence_hash: row.get(1)?, + block_group_id: row.get(2)?, + start: row.get(3)?, + end: row.get(4)?, + strand: row.get(5)?, + }) + }) + .unwrap(); + let mut objs = vec![]; + for row in rows { + objs.push(row.unwrap()); + } + objs + } } #[cfg(test)] @@ -249,9 +278,9 @@ mod tests { 8, &"+".to_string(), ); - let edge1 = Edge::create(conn, block1.id, Some(block3.id), 0, 0); - let edge2 = Edge::create(conn, block2.id, Some(block3.id), 0, 0); - Edge::create(conn, block3.id, Some(block4.id), 0, 0); + let edge1 = Edge::create(conn, Some(block1.id), Some(block3.id), 0, 0); + let edge2 = Edge::create(conn, Some(block2.id), Some(block3.id), 0, 0); + Edge::create(conn, Some(block3.id), Some(block4.id), 0, 0); let edges_into_block3 = Block::edges_into(conn, block3.id); assert_eq!(edges_into_block3.len(), 2); @@ -295,7 +324,7 @@ mod tests { 8, &"+".to_string(), ); - Edge::create(conn, block1.id, Some(block2.id), 0, 0); + Edge::create(conn, Some(block1.id), Some(block2.id), 0, 0); let edges_into_block1 = Block::edges_into(conn, block1.id); assert_eq!(edges_into_block1.len(), 0); @@ -351,9 +380,9 @@ mod tests { 8, &"+".to_string(), ); - Edge::create(conn, block1.id, Some(block2.id), 0, 0); - let edge1 = Edge::create(conn, block2.id, Some(block3.id), 0, 0); - let edge2 = Edge::create(conn, block2.id, Some(block4.id), 0, 0); + Edge::create(conn, Some(block1.id), Some(block2.id), 0, 0); + let edge1 = Edge::create(conn, Some(block2.id), Some(block3.id), 0, 0); + let edge2 = Edge::create(conn, Some(block2.id), Some(block4.id), 0, 0); let edges_out_of_block2 = Block::edges_out_of(conn, block2.id); assert_eq!(edges_out_of_block2.len(), 2); @@ -397,7 +426,7 @@ mod tests { 8, &"+".to_string(), ); - Edge::create(conn, block1.id, Some(block2.id), 0, 0); + Edge::create(conn, Some(block1.id), Some(block2.id), 0, 0); let edges_out_of_block2 = Block::edges_out_of(conn, block2.id); assert_eq!(edges_out_of_block2.len(), 0); @@ -453,11 +482,11 @@ mod tests { 8, &"+".to_string(), ); - let edge1 = Edge::create(conn, block1.id, Some(block3.id), 0, 0); - let edge2 = Edge::create(conn, block2.id, Some(block3.id), 0, 0); - let edge3 = Edge::create(conn, block3.id, Some(block4.id), 0, 0); + let edge1 = Edge::create(conn, Some(block1.id), Some(block3.id), 0, 0); + let edge2 = Edge::create(conn, Some(block2.id), Some(block3.id), 0, 0); + let edge3 = Edge::create(conn, Some(block3.id), Some(block4.id), 0, 0); - let (left_block, right_block) = Block::split(conn, block3, 4, 0, 0).unwrap(); + let (left_block, right_block) = Block::split(conn, &block3, 4, 0, 0).unwrap(); let edges_into_left_block = Block::edges_into(conn, left_block.id); assert_eq!(edges_into_left_block.len(), 2); @@ -498,7 +527,7 @@ mod tests { 8, &"+".to_string(), ); - let result = Block::split(conn, block1, -1, 0, 0); + let result = Block::split(conn, &block1, -1, 0, 0); assert!(result.is_none()); let block2 = Block::create( @@ -509,7 +538,7 @@ mod tests { 8, &"+".to_string(), ); - let result = Block::split(conn, block2, 100, 0, 0); + let result = Block::split(conn, &block2, 100, 0, 0); assert!(result.is_none()); } } diff --git a/src/models/edge.rs b/src/models/edge.rs index f54148e..de703cf 100644 --- a/src/models/edge.rs +++ b/src/models/edge.rs @@ -1,9 +1,13 @@ +use std::collections::HashMap; + +use crate::models::Path; +use rusqlite::types::Value; use rusqlite::{params_from_iter, Connection}; #[derive(Debug)] pub struct Edge { pub id: i32, - pub source_id: i32, + pub source_id: Option, pub target_id: Option, pub chromosome_index: i32, pub phased: i32, @@ -12,27 +16,33 @@ pub struct Edge { impl Edge { pub fn create( conn: &Connection, - source_id: i32, + source_id: Option, target_id: Option, chromosome_index: i32, phased: i32, ) -> Edge { - let query; - let id_query; - let mut placeholders = vec![]; - if target_id.is_some() { + let mut query; + let mut id_query; + let mut placeholders: Vec = vec![]; + if target_id.is_some() && source_id.is_some() { query = "INSERT INTO edges (source_id, target_id, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4) RETURNING *"; - id_query = "select id from edges where source_id = ?1 and target_id = ?2"; - placeholders.push(source_id); - placeholders.push(target_id.unwrap()); - placeholders.push(chromosome_index); - placeholders.push(phased); + id_query = "select id from edges where source_id = ?1 and target_id = ?2 and chromosome_index = ?3 and phased = ?4"; + placeholders.push(Value::from(source_id)); + placeholders.push(target_id.unwrap().into()); + placeholders.push(chromosome_index.into()); + placeholders.push(phased.into()); + } else if target_id.is_some() { + id_query = "select id from edges where target_id = ?1 and source_id is null and chromosome_index = ?2 and phased = ?3"; + query = "INSERT INTO edges (target_id, chromosome_index, phased) VALUES (?1, ?2, ?3) RETURNING *"; + placeholders.push(target_id.into()); + placeholders.push(chromosome_index.into()); + placeholders.push(phased.into()); } else { id_query = "select id from edges where source_id = ?1 and target_id is null and chromosome_index = ?2 and phased = ?3"; query = "INSERT INTO edges (source_id, chromosome_index, phased) VALUES (?1, ?2, ?3) RETURNING *"; - placeholders.push(source_id); - placeholders.push(chromosome_index); - placeholders.push(phased); + placeholders.push(source_id.into()); + placeholders.push(chromosome_index.into()); + placeholders.push(phased.into()); } let mut stmt = conn.prepare(query).unwrap(); match stmt.query_row(params_from_iter(&placeholders), |row| { @@ -132,7 +142,7 @@ impl Edge { let row = it.next().unwrap(); if row.is_some() { let edge = row.unwrap(); - let source_id: i32 = edge.get(1).unwrap(); + let source_id: Option = edge.get(1).unwrap(); let target_id: Option = edge.get(2).unwrap(); Some(Edge { id: edge.get(0).unwrap(), @@ -145,6 +155,43 @@ impl Edge { None } } + + pub fn get(conn: &Connection, id: i32) -> Edge { + let query = "SELECT * from edges where id = ?1;"; + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map((id,), |row| { + Ok(Edge { + id: row.get(0)?, + source_id: row.get(1)?, + target_id: row.get(2)?, + chromosome_index: row.get(3)?, + phased: row.get(4)?, + }) + }) + .unwrap(); + rows.next().unwrap().unwrap() + } + + pub fn get_edges(conn: &Connection, query: &str, placeholders: Vec) -> Vec { + let mut stmt = conn.prepare_cached(query).unwrap(); + let mut rows = stmt + .query_map(params_from_iter(placeholders), |row| { + Ok(Edge { + id: row.get(0)?, + source_id: row.get(1)?, + target_id: row.get(2)?, + chromosome_index: row.get(3)?, + phased: row.get(4)?, + }) + }) + .unwrap(); + let mut objs = vec![]; + for row in rows { + objs.push(row.unwrap()); + } + objs + } } #[derive(Debug)] diff --git a/src/models/path.rs b/src/models/path.rs new file mode 100644 index 0000000..ca36080 --- /dev/null +++ b/src/models/path.rs @@ -0,0 +1,228 @@ +use petgraph::graphmap::DiGraphMap; +use petgraph::prelude::Dfs; +use petgraph::visit::{IntoNeighborsDirected, NodeCount}; +use petgraph::{Direction, Outgoing}; +use rusqlite::types::Value; +use rusqlite::{params_from_iter, Connection}; +use std::hash::Hash; +use std::iter::from_fn; + +#[derive(Debug)] +pub struct Path { + pub id: i32, + pub name: String, + pub block_group_id: i32, + pub edges: Vec, +} + +impl Path { + pub fn create(conn: &Connection, name: &str, block_group_id: i32, edges: Vec) -> Path { + let query = "INSERT INTO path (name, block_group_id) VALUES (?1, ?2) RETURNING (id)"; + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map((name, block_group_id), |row| { + Ok(Path { + id: row.get(0)?, + name: name.to_string(), + block_group_id, + edges: edges.clone(), + }) + }) + .unwrap(); + let path = rows.next().unwrap().unwrap(); + + for (index, edge) in edges.iter().enumerate() { + let next_edge = edges.get(index + 1); + if let Some(v) = next_edge { + PathEdge::create(conn, path.id, Some(*edge), Some(*v)); + } else { + PathEdge::create(conn, path.id, Some(*edge), None); + } + } + + path + } + + pub fn get(conn: &mut Connection, path_id: i32) -> Path { + let query = "SELECT id, block_group_id, name from path where id = ?1;"; + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map((path_id,), |row| { + Ok(Path { + id: row.get(0)?, + block_group_id: row.get(1)?, + name: row.get(2)?, + edges: PathEdge::get_edges(conn, path_id), + }) + }) + .unwrap(); + rows.next().unwrap().unwrap() + } + + pub fn get_paths(conn: &Connection, query: &str, placeholders: Vec) -> Vec { + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map(params_from_iter(placeholders), |row| { + let path_id = row.get(0).unwrap(); + Ok(Path { + id: path_id, + block_group_id: row.get(1)?, + name: row.get(2)?, + edges: PathEdge::get_edges(conn, path_id), + }) + }) + .unwrap(); + let mut paths = vec![]; + for row in rows { + paths.push(row.unwrap()); + } + paths + } +} + +#[derive(Debug)] +pub struct PathEdge { + pub id: i32, + pub path_id: i32, + pub source_edge_id: Option, + pub target_edge_id: Option, +} + +impl PathEdge { + pub fn create( + conn: &Connection, + path_id: i32, + source_edge_id: Option, + target_edge_id: Option, + ) -> PathEdge { + let query = + "INSERT INTO path_edges (path_id, source_edge_id, target_edge_id) VALUES (?1, ?2, ?3) RETURNING (id)"; + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = stmt + .query_map((path_id, source_edge_id, target_edge_id), |row| { + Ok(PathEdge { + id: row.get(0)?, + path_id, + source_edge_id, + target_edge_id, + }) + }) + .unwrap(); + rows.next().unwrap().unwrap() + } + + pub fn get_edges(conn: &Connection, path_id: i32) -> Vec { + let mut edges = vec![]; + let query = "SELECT source_edge_id, target_edge_id from path_edges where path_id = ?1;"; + let mut stmt = conn.prepare_cached(query).unwrap(); + let mut rows = stmt + .query_map((path_id,), |row| { + let source_id: Option = row.get(0).unwrap(); + let target_id: Option = row.get(1).unwrap(); + Ok((source_id, target_id)) + }) + .unwrap(); + let mut edge_graph = DiGraphMap::new(); + for row in rows { + let (source, target) = row.unwrap(); + if let Some(v) = source { + edge_graph.add_node(v); + } + if let Some(v) = target { + edge_graph.add_node(v); + } + if let Some(source_v) = source { + if let Some(target_v) = target { + edge_graph.add_edge(source_v, target_v, ()); + } + } + } + let mut start_edge = None; + for node in edge_graph.nodes() { + let has_incoming = edge_graph + .neighbors_directed(node, Direction::Incoming) + .next(); + if has_incoming.is_none() { + start_edge = Some(node); + break; + } + } + if start_edge.is_none() { + panic!("No starting edge found in path {path_id}"); + } + let mut dfs = Dfs::new(&edge_graph, start_edge.unwrap()); + while let Some(nx) = dfs.next(&edge_graph) { + edges.push(nx as i32); + } + edges + } + + pub fn edges_to_graph(conn: &Connection, path_id: i32) -> DiGraphMap<(u32), ()> { + let edges = PathEdge::get_edges(conn, path_id); + let edge_str = (*edges) + .iter() + .map(|v| format!("{v}")) + .collect::>() + .join(","); + let query = format!("SELECT source_id, target_id from edges where id IN ({edge_str});"); + let mut stmt = conn.prepare(&query).unwrap(); + let mut rows = stmt + .query_map([], |row| { + let source_id: Option = row.get(0).unwrap(); + let target_id: Option = row.get(1).unwrap(); + Ok((source_id, target_id)) + }) + .unwrap(); + let mut graph = DiGraphMap::new(); + for edge in rows { + let (source, target) = edge.unwrap(); + println!("edg eis {source:?} {target:?}"); + if let Some(source_value) = source { + graph.add_node(source_value); + if let Some(target_value) = target { + graph.add_edge(source_value, target_value, ()); + } + } + if let Some(target_value) = target { + graph.add_node(target_value); + } + } + graph + } +} + +// hacked from https://docs.rs/petgraph/latest/src/petgraph/algo/simple_paths.rs.html#36-102 to support digraphmap +pub fn all_simple_paths( + graph: G, + from: G::NodeId, + to: G::NodeId, +) -> impl Iterator> +where + G: NodeCount, + G: IntoNeighborsDirected, + G::NodeId: Eq + Hash, +{ + // list of visited nodes + let mut visited = vec![from]; + // list of childs of currently exploring path nodes, + // last elem is list of childs of last visited node + let mut stack = vec![graph.neighbors_directed(from, Outgoing)]; + + from_fn(move || { + while let Some(children) = stack.last_mut() { + if let Some(child) = children.next() { + if child == to { + let path = visited.iter().cloned().chain(Some(to)).collect::<_>(); + return Some(path); + } else if !visited.contains(&child) { + visited.push(child); + stack.push(graph.neighbors_directed(child, Outgoing)); + } + } else { + stack.pop(); + visited.pop(); + } + } + None + }) +} diff --git a/src/models/sequence.rs b/src/models/sequence.rs index f6da6f4..cd9fb36 100644 --- a/src/models/sequence.rs +++ b/src/models/sequence.rs @@ -1,4 +1,6 @@ -use rusqlite::Connection; +use crate::models::edge::Edge; +use rusqlite::types::Value; +use rusqlite::{params_from_iter, Connection}; use sha2::{Digest, Sha256}; #[derive(Debug)] @@ -48,4 +50,27 @@ impl Sequence { } obj_hash } + + pub fn get_sequences( + conn: &Connection, + query: &str, + placeholders: Vec, + ) -> Vec { + let mut stmt = conn.prepare_cached(query).unwrap(); + let mut rows = stmt + .query_map(params_from_iter(placeholders), |row| { + Ok(Sequence { + hash: row.get(0).unwrap(), + sequence_type: row.get(1).unwrap(), + sequence: row.get(2).unwrap(), + length: row.get(3).unwrap(), + }) + }) + .unwrap(); + let mut objs = vec![]; + for row in rows { + objs.push(row.unwrap()); + } + objs + } }