diff --git a/Cargo.lock b/Cargo.lock index 80d93e0..8a4c34a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "Inflector" -version = "0.11.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" - [[package]] name = "addr2line" version = "0.19.0" @@ -64,6 +58,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -107,7 +107,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -208,6 +208,12 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +[[package]] +name = "binary-merge" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597bb81c80a54b6a4381b23faba8d7774b144c94cbd1d6fe3f1329bd776554ab" + [[package]] name = "bindgen" version = "0.65.1" @@ -226,7 +232,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -235,6 +241,18 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "brotli" version = "3.3.4" @@ -287,9 +305,9 @@ checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" [[package]] name = "bytecheck" -version = "0.6.10" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13fe11640a23eb24562225322cd3e452b93a3d4091d62fab69c70542fcd17d1f" +checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" dependencies = [ "bytecheck_derive", "ptr_meta", @@ -298,9 +316,9 @@ dependencies = [ [[package]] name = "bytecheck_derive" -version = "0.6.10" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31225543cb46f81a7e224762764f4a6a0f097b1db0b175f69e8065efaa42de5" +checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" dependencies = [ "proc-macro2", "quote", @@ -309,9 +327,9 @@ dependencies = [ [[package]] name = "bytecount" -version = "0.6.3" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +checksum = "d1a12477b7237a01c11a80a51278165f9ba0edd28fa6db00a65ab230320dc58c" [[package]] name = "bytemuck" @@ -321,9 +339,9 @@ checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -352,6 +370,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + [[package]] name = "cc" version = "1.0.79" @@ -384,17 +411,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.24" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", - "num-integer", "num-traits", - "time 0.1.45", "wasm-bindgen", - "winapi", + "windows-targets 0.48.0", ] [[package]] @@ -517,16 +543,6 @@ dependencies = [ "cfg-if 1.0.0", ] -[[package]] -name = "crossbeam-channel" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" -dependencies = [ - "cfg-if 1.0.0", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.3" @@ -540,9 +556,9 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.14" +version = "0.9.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", "cfg-if 1.0.0", @@ -553,9 +569,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" +checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ "cfg-if 1.0.0", ] @@ -605,7 +621,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -622,7 +638,7 @@ checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -637,9 +653,9 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encoding_rs" @@ -650,6 +666,18 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "enum_dispatch" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f33313078bb8d4d05a2733a94ac4c2d8a0df9a2b84424ebf4f33bfc224a890e" +dependencies = [ + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.29", +] + [[package]] name = "env_logger" version = "0.9.3" @@ -731,19 +759,6 @@ dependencies = [ "miniz_oxide 0.7.1", ] -[[package]] -name = "flume" -version = "0.10.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" -dependencies = [ - "futures-core", - "futures-sink", - "nanorand", - "pin-project", - "spin 0.9.8", -] - [[package]] name = "fnv" version = "1.0.7" @@ -759,6 +774,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures-channel" version = "0.3.28" @@ -844,7 +865,7 @@ dependencies = [ "cfg-if 1.0.0", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] @@ -1107,6 +1128,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "inplace-vec-builder" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf64c2edc8226891a71f127587a2861b132d2b942310843814d5001d99a1d307" +dependencies = [ + "smallvec", +] + [[package]] name = "instant" version = "0.1.12" @@ -1133,6 +1163,15 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -1170,9 +1209,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" dependencies = [ "wasm-bindgen", ] @@ -1191,9 +1230,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.142" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libloading" @@ -1207,9 +1246,9 @@ dependencies = [ [[package]] name = "librocksdb-sys" -version = "0.6.3+6.28.2" +version = "0.11.0+8.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "184ce2a189a817be2731070775ad053b6804a340fee05c6686d711db27455917" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" dependencies = [ "bindgen", "bzip2-sys", @@ -1217,14 +1256,15 @@ dependencies = [ "glob", "libc", "libz-sys", + "lz4-sys", "zstd-sys", ] [[package]] name = "libz-sys" -version = "1.1.9" +version = "1.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" +checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" dependencies = [ "cc", "pkg-config", @@ -1258,11 +1298,18 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lz4-sys" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" dependencies = [ - "cfg-if 1.0.0", + "cc", + "libc", ] [[package]] @@ -1301,6 +1348,20 @@ dependencies = [ "sourmash", ] +[[package]] +name = "mastiff-index" +version = "0.1.0" +dependencies = [ + "camino", + "clap", + "env_logger", + "histogram", + "log", + "numsep", + "size", + "sourmash", +] + [[package]] name = "mastiff-server" version = "0.1.0" @@ -1360,11 +1421,20 @@ dependencies = [ "libc", ] +[[package]] +name = "memmap2" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deaba38d7abf1d4cca21cc89e932e542ba2b9258664d2a9ef0e61512039c9375" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" dependencies = [ "autocfg", ] @@ -1417,7 +1487,7 @@ checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.45.0", ] @@ -1445,15 +1515,6 @@ version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664" -[[package]] -name = "nanorand" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" -dependencies = [ - "getrandom", -] - [[package]] name = "needletail" version = "0.4.1" @@ -1596,9 +1657,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "os_info" @@ -1619,25 +1680,27 @@ checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" [[package]] name = "ouroboros" -version = "0.15.6" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1358bd1558bd2a083fed428ffeda486fbfb323e698cdda7794259d592ca72db" +checksum = "1c86de06555b970aec45229b27291b53154f21a5743a163419f4e4c0b065dcde" dependencies = [ "aliasable", "ouroboros_macro", + "static_assertions", ] [[package]] name = "ouroboros_macro" -version = "0.15.6" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f7d21ccd03305a674437ee1248f3ab5d4b1db095cf1caf49f1713ddf61956b7" +checksum = "8cad0c4b129e9696e37cb712b243777b90ef489a0bfaa0ac34e7d9b860e4f134" dependencies = [ - "Inflector", + "heck", + "itertools", "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.29", ] [[package]] @@ -1738,17 +1801,18 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "piz" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58c75d1c00e6d407e283cc66d9d4fd0985ef1703c761520845b93c4f981bfb65" +checksum = "898b071c1938a2c92b95c18708cbf38f2566a01f0ab9dd7bdf4329987e5c2e17" dependencies = [ + "camino", "chrono", "codepage-437", "crc32fast", "flate2", "log", + "memchr", "thiserror", - "twoway", ] [[package]] @@ -1785,12 +1849,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.4" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62" dependencies = [ "proc-macro2", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -1828,9 +1892,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] @@ -1869,13 +1933,19 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.26" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -1908,9 +1978,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" dependencies = [ "either", "rayon-core", @@ -1918,14 +1988,12 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" dependencies = [ - "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "num_cpus", ] [[package]] @@ -2028,9 +2096,9 @@ dependencies = [ [[package]] name = "retain_mut" -version = "0.1.9" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" +checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" [[package]] name = "ring" @@ -2049,23 +2117,26 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.41" +version = "0.7.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21499ed91807f07ae081880aabb2ccc0235e9d88011867d984525e9a4c3cfa3e" +checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58" dependencies = [ + "bitvec", "bytecheck", "hashbrown", "ptr_meta", "rend", "rkyv_derive", "seahash", + "tinyvec", + "uuid 1.3.2", ] [[package]] name = "rkyv_derive" -version = "0.7.41" +version = "0.7.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac1c672430eb41556291981f45ca900a0239ad007242d1cb4b4167af842db666" +checksum = "b2e06b915b5c230a17d7a736d1e2e63ee753c256a8614ef3f5147b13a4f5541d" dependencies = [ "proc-macro2", "quote", @@ -2074,9 +2145,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.9.0" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd539cab4e32019956fe7e0cf160bb6d4802f4be2b52c4253d76d3bb0f85a5f7" +checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" dependencies = [ "bytemuck", "byteorder", @@ -2085,9 +2156,9 @@ dependencies = [ [[package]] name = "rocksdb" -version = "0.18.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "620f4129485ff1a7128d184bc687470c21c7951b64779ebc9cfdad3dcd920290" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" dependencies = [ "libc", "librocksdb-sys", @@ -2355,36 +2426,36 @@ dependencies = [ "serde", "serde_json", "thiserror", - "time 0.3.20", + "time", "url", "uuid 1.3.2", ] [[package]] name = "serde" -version = "1.0.160" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.160" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] name = "serde_json" -version = "1.0.96" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "itoa", "ryu", @@ -2479,26 +2550,28 @@ checksum = "c361f1b577b7528df46d04ee4d800a2fb4eafd44f187940789e8ab7fd378b509" [[package]] name = "sourmash" version = "0.12.0" -source = "git+https://github.com/sourmash-bio/sourmash?tag=mastiff_roaring#2819307a768469ed8d6bd15f8e17889edfcf81b6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760c7b049cc70294122c44c4e6d0922ed0e79a8e04f2d739b98a982027a9fd4a" dependencies = [ "az", - "bytecount", "byteorder", + "camino", "cfg-if 1.0.0", + "chrono", "counter", "csv", + "enum_dispatch", "fixedbitset", - "flume", + "getrandom", "getset", "histogram", "log", "md5", - "memmap2", + "memmap2 0.9.0", "murmurhash3", "niffler", "nohash-hasher", "num-iter", - "numsep", "once_cell", "ouroboros", "piz", @@ -2509,7 +2582,6 @@ dependencies = [ "rocksdb", "serde", "serde_json", - "size", "thiserror", "twox-hash", "typed-builder", @@ -2529,9 +2601,6 @@ name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] [[package]] name = "stable_deref_trait" @@ -2558,7 +2627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737" dependencies = [ "debugid", - "memmap2", + "memmap2 0.5.10", "stable_deref_trait", "uuid 1.3.2", ] @@ -2586,9 +2655,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.15" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ "proc-macro2", "quote", @@ -2611,6 +2680,12 @@ dependencies = [ "libc", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.5.0" @@ -2656,7 +2731,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -2669,17 +2744,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - [[package]] name = "time" version = "0.3.20" @@ -2749,7 +2813,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -2854,7 +2918,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.29", ] [[package]] @@ -2925,32 +2989,22 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" -[[package]] -name = "twoway" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c57ffb460d7c24cd6eda43694110189030a3d1dfe418416d9468fd1c1d290b47" -dependencies = [ - "memchr", - "unchecked-index", -] - [[package]] name = "twox-hash" version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 1.0.0", + "cfg-if 0.1.10", "rand", "static_assertions", ] [[package]] name = "typed-builder" -version = "0.10.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c" +checksum = "64cba322cb9b7bc6ca048de49e83918223f35e7a86311267013afff257004870" dependencies = [ "proc-macro2", "quote", @@ -2972,12 +3026,6 @@ dependencies = [ "libc", ] -[[package]] -name = "unchecked-index" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" - [[package]] name = "unicase" version = "2.6.0" @@ -3077,10 +3125,13 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vec-collections" -version = "0.3.6" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2390c4dc8ae8640c57d067b1a3d40bc05c124cc6bc7394d761b53435d41b76" +checksum = "3c9965c8f2ffed1dbcd16cafe18a009642f540fa22661c6cfd6309ddb02e4982" dependencies = [ + "binary-merge", + "inplace-vec-builder", + "lazy_static", "num-traits", "serde", "smallvec", @@ -3109,12 +3160,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -3123,9 +3168,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" dependencies = [ "cfg-if 1.0.0", "serde", @@ -3135,16 +3180,16 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.29", "wasm-bindgen-shared", ] @@ -3162,9 +3207,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3172,28 +3217,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.29", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f" dependencies = [ "js-sys", "wasm-bindgen", @@ -3399,6 +3444,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xz2" version = "0.1.7" @@ -3410,10 +3464,11 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.6.3+zstd.1.5.2" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", + "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index 2612056..15817dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,21 +1,26 @@ [workspace] -members = ["crates/server", "crates/client"] +members = ["crates/*"] default-members = ["crates/server"] +resolver = "2" [workspace.package] version = "0.1.0" name = "mastiff" [workspace.dependencies] +camino = "1.1.6" clap = { version = "3.2.8", features = [ "derive" ] } color-eyre = "0.6.2" csv = "1.1.6" env_logger = "0.9.0" +histogram = "0.6.9" log = "0.4.17" needletail = "0.4.1" niffler = { version = "2.4.0", default-features = false, features = [ "gz" ]} +numsep = "0.1.12" reqwest = { version = "0.11.11", default-features = false, features = [ "blocking", "rustls-tls" ] } -sourmash = { git = "https://github.com/sourmash-bio/sourmash", tag = "mastiff_roaring" } +size = "0.4.0" +sourmash = { version = "0.12.0", features = ["branchwater"] } serde_json = "1.0.83" # axum deps diff --git a/crates/client/src/main.rs b/crates/client/src/main.rs index 0a4fc82..a45ec89 100644 --- a/crates/client/src/main.rs +++ b/crates/client/src/main.rs @@ -107,7 +107,7 @@ fn main() -> Result<()> { let mut sigs = Signature::load_signatures( &mut reader, Some(21), - Some(HashFunctions::murmur64_DNA), + Some(HashFunctions::Murmur64Dna), Some(1000), )?; diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml new file mode 100644 index 0000000..bc91f83 --- /dev/null +++ b/crates/index/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "mastiff-index" +version.workspace = true +edition = "2021" +license = "AGPL" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +camino.workspace = true +clap.workspace = true +env_logger.workspace = true +histogram.workspace = true +log.workspace = true +numsep.workspace = true +size.workspace = true +sourmash.workspace = true diff --git a/crates/index/src/main.rs b/crates/index/src/main.rs new file mode 100644 index 0000000..3c4b5cf --- /dev/null +++ b/crates/index/src/main.rs @@ -0,0 +1,559 @@ +use camino::Utf8Path as Path; +use camino::Utf8PathBuf as PathBuf; +use clap::{Parser, Subcommand}; +use log::info; + +use sourmash::collection::Collection; +use sourmash::index::revindex::{prepare_query, RevIndex, RevIndexOps}; +use sourmash::manifest::Manifest; +use sourmash::prelude::*; +use sourmash::signature::{Signature, SigsTrait}; +use sourmash::storage::{FSStorage, InnerStorage, ZipStorage}; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Cli { + #[clap(subcommand)] + command: Commands, +} + +#[derive(Subcommand, Debug)] +enum Commands { + Index { + /// Location of the input data. + /// Either a zip file or a path to a directory containing signatures. + location: PathBuf, + + /// Manifest for sigs to be loaded from storage + #[clap(short, long)] + manifest: Option, + + /// ksize + #[clap(short, long, default_value = "31")] + ksize: u8, + + /// scaled + #[clap(short, long, default_value = "1000")] + scaled: usize, + + /// The path for output + #[clap(short, long)] + output: PathBuf, + + /// Index using colors + #[clap(long = "colors")] + colors: bool, + }, + Update { + /// Location of the input data. + /// Either a zip file or a path to a directory containing signatures. + location: PathBuf, + + /// Manifest for sigs to be loaded from storage + #[clap(short, long)] + manifest: Option, + + /// ksize + #[clap(short, long, default_value = "31")] + ksize: u8, + + /// scaled + #[clap(short, long, default_value = "1000")] + scaled: usize, + + /// The path for output + #[clap(short, long)] + output: PathBuf, + }, + /* TODO: need the repair_cf variant, not available in rocksdb-rust yet + Repair { + /// The path for DB to repair + #[clap(parse(from_os_str))] + index: PathBuf, + + /// Repair using colors + #[clap(long = "colors")] + colors: bool, + }, + */ + Manifest { + /// File with list of paths to signatures + pathlist: PathBuf, + + /// ksize + #[clap(short, long)] + ksize: Option, + + /// Path for future FSStorage. + /// + /// Will be removed from a record internal location in manifest. + #[clap(short, long)] + basepath: Option, + + /// The path for output + #[clap(short, long)] + output: Option, + }, + Check { + /// The path for output + output: PathBuf, + + /// avoid deserializing data, and without stats + #[clap(long = "quick")] + quick: bool, + }, + Convert { + /// The path for the input DB + input: PathBuf, + + /// The path for the output DB + output: PathBuf, + }, + Search { + /// Query signature + query_path: PathBuf, + + /// Path to rocksdb index dir + index: PathBuf, + + /// ksize + #[clap(short = 'k', long = "ksize", default_value = "31")] + ksize: u8, + + /// scaled + #[clap(short = 's', long = "scaled", default_value = "1000")] + scaled: usize, + + /// threshold_bp + #[clap(short = 't', long = "threshold_bp", default_value = "50000")] + threshold_bp: usize, + + /// minimum containment to report + #[clap(short = 'c', long = "containment", default_value = "0.2")] + containment: f64, + + /// The path for output + #[clap(short = 'o', long = "output")] + output: Option, + }, + Gather { + /// Query signature + query_path: PathBuf, + + /// Path to rocksdb index dir + index: PathBuf, + + /// ksize + #[clap(short = 'k', long = "ksize", default_value = "31")] + ksize: u8, + + /// scaled + #[clap(short = 's', long = "scaled", default_value = "1000")] + scaled: usize, + + /// threshold_bp + #[clap(short = 't', long = "threshold_bp", default_value = "50000")] + threshold_bp: usize, + + /// The path for output + #[clap(short = 'o', long = "output")] + output: Option, + }, +} + +fn gather>( + queries_file: P, + index: P, + selection: Selection, + threshold_bp: usize, + _output: Option

, +) -> Result<(), Box> { + let query_sig = Signature::from_path(queries_file.as_ref())? + .swap_remove(0) + .select(&selection)?; + + let mut query = None; + if let Some(q) = prepare_query(query_sig, &selection) { + query = Some(q); + } + let query = query.expect("Couldn't find a compatible MinHash"); + + let threshold = threshold_bp / query.scaled() as usize; + + let db = RevIndex::open(index.as_ref(), true)?; + info!("Loaded DB"); + + info!("Building counter"); + let (counter, query_colors, hash_to_color) = db.prepare_gather_counters(&query); + // TODO: truncate on threshold? + info!("Counter built"); + + let matches = db.gather( + counter, + query_colors, + hash_to_color, + threshold, + &query, + Some(selection), + )?; + + info!("matches: {}", matches.len()); + for match_ in matches { + println!( + "{} {} {}", + match_.name(), + match_.intersect_bp(), + match_.f_match() + ) + } + + Ok(()) +} + +fn search>( + queries_file: P, + index: P, + selection: Selection, + threshold_bp: usize, + minimum_containment: f64, + _output: Option

, +) -> Result<(), Box> { + let query_sig = Signature::from_path(queries_file.as_ref())? + .swap_remove(0) + .select(&selection)?; + + let mut query = None; + if let Some(q) = prepare_query(query_sig, &selection) { + query = Some(q); + } + let query = query.expect("Couldn't find a compatible MinHash"); + let query_size = query.size() as f64; + + let threshold = threshold_bp / query.scaled() as usize; + + let db = RevIndex::open(index.as_ref(), true)?; + info!("Loaded DB"); + + info!("Building counter"); + let counter = db.counter_for_query(&query); + info!("Counter built"); + + let matches = db.matches_from_counter(counter, threshold); + + //info!("matches: {}", matches.len()); + println!("SRA ID,containment"); + matches + .into_iter() + .filter_map(|(path, size)| { + let containment = size as f64 / query_size; + if containment >= minimum_containment { + println!( + "{},{}", + path.split("/").last().unwrap().split(".").next().unwrap(), + containment + ); + Some(()) + } else { + None + } + }) + .count(); + + Ok(()) +} + +fn index>( + location: P, + manifest: Option

, + selection: Selection, + output: P, + colors: bool, +) -> Result<(), Box> { + let manifest = if let Some(m) = manifest { + let rdr = std::fs::OpenOptions::new().read(true).open(m.as_ref())?; + Some(Manifest::from_reader(rdr)?) + } else { + None + }; + + let collection = if matches!(location.as_ref().extension(), Some("zip")) { + if let Some(m) = manifest { + let storage = ZipStorage::from_file(location)?; + Collection::new(m, InnerStorage::new(storage)) + } else { + Collection::from_zipfile(location)? + } + } else { + let manifest = manifest.ok_or_else(|| "Need a manifest")?; + assert!(location.as_ref().exists()); + assert!(location.as_ref().is_dir()); + let storage = FSStorage::builder() + .fullpath(location.as_ref().into()) + .subdir("".into()) + .build(); + Collection::new(manifest, InnerStorage::new(storage)) + }; + + RevIndex::create( + output.as_ref(), + collection.select(&selection)?.try_into()?, + colors, + )?; + + Ok(()) +} + +fn update>( + location: P, + manifest: Option

, + selection: Selection, + output: P, +) -> Result<(), Box> { + let manifest = if let Some(m) = manifest { + let rdr = std::fs::OpenOptions::new().read(true).open(m.as_ref())?; + Some(Manifest::from_reader(rdr)?) + } else { + None + }; + + let collection = if matches!(location.as_ref().extension(), Some("zip")) { + if let Some(m) = manifest { + let storage = ZipStorage::from_file(location)?; + Collection::new(m, InnerStorage::new(storage)) + } else { + Collection::from_zipfile(location)? + } + } else { + let manifest = manifest.ok_or_else(|| "Need a manifest")?; + assert!(location.as_ref().exists()); + assert!(location.as_ref().is_dir()); + let storage = FSStorage::builder() + .fullpath(location.as_ref().into()) + .subdir("".into()) + .build(); + Collection::new(manifest, InnerStorage::new(storage)) + }; + + let db = RevIndex::open(output.as_ref(), false)?; + db.update(collection.select(&selection)?.try_into()?)?; + + Ok(()) +} + +fn convert>(_input: P, _output: P) -> Result<(), Box> { + todo!() + /* + info!("Opening input DB"); + let db = RevIndex::open(input.as_ref(), true); + + info!("Creating output DB"); + let output_db = RevIndex::create(output.as_ref(), true); + + info!("Converting input DB"); + db.convert(output_db)?; + + info!("Finished conversion"); + Ok(()) + */ +} + +fn manifest>( + pathlist: P, + output: Option

, + selection: Option, + basepath: Option

, +) -> Result<(), Box> { + use std::fs::File; + use std::io::{BufRead, BufReader, BufWriter, Write}; + + let paths: Vec = BufReader::new(File::open(pathlist.as_ref())?) + .lines() + .map(|line| { + let mut path = PathBuf::new(); + path.push(line.unwrap()); + path + }) + .collect(); + + let manifest: Manifest = paths.as_slice().into(); + + let manifest = if let Some(selection) = selection { + manifest.select(&selection)? + } else { + manifest + }; + + let manifest = if let Some(basepath) = basepath { + let path: &str = basepath.as_ref().as_str(); + manifest + .iter() + .map(|r| { + let mut record = r.clone(); + record.set_internal_location( + r.internal_location() + .strip_prefix(path) + .expect("Error stripping") + .into(), + ); + record + }) + .collect::>() + .into() + } else { + manifest + }; + + let out: Box = match output { + Some(path) => Box::new(BufWriter::new(File::create(path.as_ref()).unwrap())), + None => Box::new(std::io::stdout()), + }; + + manifest.to_writer(out)?; + + Ok(()) +} + +fn check>(output: P, quick: bool) -> Result<(), Box> { + use numsep::{separate, Locale}; + use size::Size; + + info!("Opening DB"); + let db = RevIndex::open(output.as_ref(), true)?; + + info!("Starting check"); + let stats = db.check(quick); + + let kcount = *stats.kcount(); + let vcount = *stats.vcount(); + let vcounts = stats.vcounts(); + + //info!("*** {} ***", cf_name); + let ksize = Size::from_bytes(kcount); + let vsize = Size::from_bytes(vcount); + if !quick { + info!( + "total datasets: {}", + separate(stats.total_datasets(), Locale::English) + ); + } + info!( + "total keys: {}", + separate(stats.kcount() / 8, Locale::English) + ); + + info!("k: {}", ksize.to_string()); + info!("v: {}", vsize.to_string()); + + if !quick && kcount > 0 { + info!("max v: {}", vcounts.maximum().unwrap()); + info!("mean v: {}", vcounts.mean().unwrap()); + info!("stddev: {}", vcounts.stddev().unwrap()); + info!("median v: {}", vcounts.percentile(50.0).unwrap()); + info!("p25 v: {}", vcounts.percentile(25.0).unwrap()); + info!("p75 v: {}", vcounts.percentile(75.0).unwrap()); + } + + info!("Finished check"); + Ok(()) +} + +/* TODO: need the repair_cf variant, not available in rocksdb-rust yet +fn repair>(output: P, colors: bool) { + info!("Starting repair"); + RevIndex::repair(output.as_ref(), colors); + info!("Finished repair"); +} +*/ + +fn main() -> Result<(), Box> { + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); + use Commands::*; + + let opts = Cli::parse(); + + match opts.command { + Index { + output, + location, + manifest, + ksize, + scaled, + colors, + } => { + let selection = Selection::builder() + .ksize(ksize.into()) + .scaled(scaled as u32) + .build(); + + index(location, manifest, selection, output, colors)? + } + Update { + output, + location, + manifest, + ksize, + scaled, + } => { + let selection = Selection::builder() + .ksize(ksize.into()) + .scaled(scaled as u32) + .build(); + + update(location, manifest, selection, output)? + } + Check { output, quick } => check(output, quick)?, + Convert { input, output } => convert(input, output)?, + Manifest { + pathlist, + output, + ksize, + basepath, + } => { + let selection = ksize.map(|ksize| Selection::builder().ksize(ksize.into()).build()); + + manifest(pathlist, output, selection, basepath)? + } + Search { + query_path, + output, + index, + threshold_bp, + ksize, + scaled, + containment, + } => { + let selection = Selection::builder() + .ksize(ksize.into()) + .scaled(scaled as u32) + .build(); + + search( + query_path, + index, + selection, + threshold_bp, + containment, + output, + )? + } + Gather { + query_path, + output, + index, + threshold_bp, + ksize, + scaled, + } => { + let selection = Selection::builder() + .ksize(ksize.into()) + .scaled(scaled as u32) + .build(); + + gather(query_path, index, selection, threshold_bp, output)? + } /* TODO: need the repair_cf variant, not available in rocksdb-rust yet + Repair { index, colors } => repair(index, colors), + */ + }; + + Ok(()) +} diff --git a/crates/server/src/main.rs b/crates/server/src/main.rs index 76295bb..d9a09ad 100644 --- a/crates/server/src/main.rs +++ b/crates/server/src/main.rs @@ -6,7 +6,7 @@ use axum::{ extract::{ContentLengthLimit, Extension}, http::{header, StatusCode}, response::{IntoResponse, Response}, - routing::{get_service, post}, + routing::{get, get_service, post}, Router, }; use sentry::integrations::tower::{NewSentryLayer, SentryHttpLayer}; @@ -18,7 +18,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use clap::Parser; use color_eyre::eyre::Result; -use sourmash::index::revindex::RevIndex; +use sourmash::index::revindex::{RevIndex, RevIndexOps}; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::{max_hash_for_scaled, KmerMinHash}; use sourmash::sketch::Sketch; @@ -93,7 +93,7 @@ fn main() -> Result<()> { let threshold = opts.threshold_bp / mh.scaled() as usize; let state = Arc::new(State { - db: Arc::new(RevIndex::open(opts.index.as_ref(), true)), + db: Arc::new(RevIndex::open(opts.index, true).expect("Error opening DB")), template: Arc::new(Sketch::MinHash(mh)), threshold, }); @@ -101,6 +101,7 @@ fn main() -> Result<()> { // Build our application by composing routes let app = Router::new() .route("/search", post(search)) + .route("/health", get(health)) //.route("/gather", post(gather)) .fallback(get_service(ServeDir::new(opts.assets)).handle_error(handle_static_serve_error)) // Add middleware to all routes @@ -159,8 +160,9 @@ impl State { Err("Could not extract compatible sketch to compare") } }) - .await? else { - return Err("Could not extract compatible sketch to compare".into()) + .await? + else { + return Err("Could not extract compatible sketch to compare".into()); }; let mut csv = vec!["SRA accession,containment".into()]; @@ -222,6 +224,10 @@ async fn search( } } +async fn health() -> Response { + (StatusCode::OK, "I'm doing science and I'm still alive").into_response() +} + async fn handle_static_serve_error(error: std::io::Error) -> impl IntoResponse { ( StatusCode::INTERNAL_SERVER_ERROR, diff --git a/experiments/Snakefile b/experiments/Snakefile new file mode 100644 index 0000000..6729262 --- /dev/null +++ b/experiments/Snakefile @@ -0,0 +1,193 @@ +EXEC="cargo run -p mastiff-index --release -- " + +rule all: +# input: "outputs/rs207" +# input: "outputs/rs207-2k" + input: expand("outputs/genbank-{domain}", domain=["archaea", "bacteria", "fungi", "protozoa", "viral"]) + +rule rs207_1k: + output: directory("outputs/rs207-1k") + input: + storage="inputs/gtdb-rs207.genomic-reps.dna.k21.zip", + manifest="inputs/gtdb-rs207.genomic-reps.dna.k21.manifest" + threads: 24 + shell: """ + export RAYON_NUM_THREADS={threads} + RUST_LOG=info {EXEC} index -k 21 -s 1000 \ + --output {output} \ + --manifest <(head -1002 {input.manifest}) \ + {input.storage} + """ + +rule rs207_2k: + output: directory("outputs/rs207-2k") + input: + previous="outputs/rs207-1k", + storage="inputs/gtdb-rs207.genomic-reps.dna.k21.zip", + manifest="inputs/gtdb-rs207.genomic-reps.dna.k21.manifest", + threads: 24 + shell: """ + export RAYON_NUM_THREADS={threads} + cp -a {input.previous} {output} + {EXEC} update -k 21 -s 1000 \ + --output {output} \ + --manifest <(head -2002 {input.manifest}) \ + {input.storage} + """ + +rule rs_207: + output: directory("outputs/rs207") + input: "inputs/gtdb-rs207.genomic-reps.dna.k21.zip" + shell: """ + {EXEC} index -k 21 -s 1000 \ + --output {output} \ + {input} + """ + +rule download_rs_207: + output: "inputs/gtdb-rs207.genomic-reps.dna.k21.zip" + shell: """ + curl -L https://osf.io/download/f2wzc/ -o {output} + """ + +rule extract_rs_207_manifest: + output: "inputs/gtdb-rs207.genomic-reps.dna.k21.manifest" + input: "inputs/gtdb-rs207.genomic-reps.dna.k21.zip" + shell: """ + unzip -p {input} SOURMASH-MANIFEST.csv > {output} + """ + +####################################################################### + +rule metag_100: + output: directory("outputs/metag-100") + input: + storage="/data/wort/wort-sra", + manifest="outputs/metagenomes-k21-s1000.manifest", + threads: 24 + shell: """ + export RAYON_NUM_THREADS={threads} + RUST_LOG=info {EXEC} index -k 21 -s 1000 \ + --output {output} \ + --manifest <(head -102 {input.manifest}) \ + {input.storage} + """ + +rule metag_200: + output: directory("outputs/metag-200") + input: + previous="outputs/metag-100", + storage="/data/wort/wort-sra", + manifest="outputs/metagenomes-k21-s1000.manifest", + threads: 24 + shell: """ + export RAYON_NUM_THREADS={threads} + cp -a {input.previous} {output} + {EXEC} update -k 21 -s 1000 \ + --output {output} \ + --manifest <(head -202 {input.manifest}) \ + {input.storage} + """ + +rule metag: + output: directory("outputs/metag") + input: + storage="/data/wort/wort-sra", + manifest="outputs/metagenomes-k21-s1000.manifest", + threads: 24 + shell: """ + export RAYON_NUM_THREADS={threads} + RUST_LOG=info {EXEC} index -k 21 -s 1000 \ + --output {output} \ + --manifest {input.manifest} \ + {input.storage} + """ + +####################################################################### + +rule manifest_test: + output: + manifest="inputs/metagenomes-k{ksize}-s1000-test.manifest", + input: + catalog="outputs/metagenomes-catalog", + threads: 24, + shell: """ + export RAYON_NUM_THREADS={threads} + RUST_LOG=info {EXEC} manifest -k 21 \ + --output {output} \ + --basepath /data/wort/wort-sra \ + <(head -2000 {input.catalog}) + """ + +####################################################################### + +rule manifest_from_catalog: + output: + manifest="outputs/metagenomes-k{ksize}-s1000.manifest", + input: + catalog="outputs/metagenomes-catalog", + threads: 24, + shell: """ + export RAYON_NUM_THREADS={threads} + RUST_LOG=info {EXEC} manifest -k 21 \ + --output {output} \ + --basepath /data/wort/wort-sra \ + {input.catalog} + """ + +rule catalog_metagenomes: + output: + catalog="outputs/metagenomes-catalog", + input: + runinfo="runinfo-20230817.csv", + basepath="/data/wort/wort-sra/" + run: + import csv + from pathlib import Path + + # load all sra IDs + sraids = set() + with open(input.runinfo) as fp: + data = csv.DictReader(fp, delimiter=",") + for dataset in data: + if dataset['Run'] != 'Run': + sraids.add(dataset['Run']) + + path = Path(input.basepath) + with open(output.catalog, 'w') as out: + # check if sraids exist on disk + for sra_id in sraids: + sig_path = path / "sigs" / f"{sra_id}.sig" + if sig_path.exists(): + out.write(f"{sig_path}\n") + out.flush() + +####################################################################### + +rule genbank: + output: directory("outputs/genbank-{domain}") + input: "/data/wort/databases/genbank-2022.03-{domain}-k21.zip" + + shell: """ + {EXEC} index -k 21 -s 1000 \ + --output {output} \ + {input} + """ + +####################################################################### + +""" +{EXEC} index -k 21 -s 1000 --output /scratch/analysis/rocksdb_metagenomes catalog_metagenomes +{EXEC} index -k 21 -s 10000 --output /scratch/analysis/rocksdb_metagenomes2 <(cat catalog_metagenomes | head 1000) +{EXEC} index -k 21 -s 1000 --output /scratch/analysis/rocksdb_metagenomes2 <(cat catalog_metagenomes | head -n 10) +{EXEC} index -k 21 -s 10000 --output /scratch/analysis/rocksdb_metagenomes2 <(cat catalog_metagenomes | head -n 10) +{EXEC} index -k 21 -s 10000 --output /scratch/analysis/rocksdb_metagenomes2 <(cat catalog_metagenomes | head -n 1000) +{EXEC} index -k 51 -s 1000 --output bacteria-100k-enum flist +{EXEC} index -k 51 -s 1000 --output bacteria-100k-cf <(head -1 flist) +{EXEC} index -k 51 -s 1000 --output bacteria-1k-cf (head -1000 flist) +{EXEC} index -k 51 -s 1000 --output bacteria-1k-cf $(head -1000 flist) +{EXEC} index -k 51 -s 1000 --output bacteria-10k-cf <(head -10000 flist) +{EXEC} index -k 51 -s 1000 --output bacteria-100k-cf flist +{EXEC} index -k 51 -s 1000 --output bacteria-1k-cf-opts <(head -1000 flist) +{EXEC} index -k 51 -s 1000 --output bacteria-100k-cf-opts flist +""" diff --git a/flake.lock b/flake.lock index 35c718a..9e04d57 100644 --- a/flake.lock +++ b/flake.lock @@ -2,23 +2,16 @@ "nodes": { "crane": { "inputs": { - "flake-compat": "flake-compat", - "flake-utils": [ - "flake-utils" - ], "nixpkgs": [ "nixpkgs" - ], - "rust-overlay": [ - "rust-overlay" ] }, "locked": { - "lastModified": 1683505101, - "narHash": "sha256-VBU64Jfu2V4sUR5+tuQS9erBRAe/QEYUxdVMcJGMZZs=", + "lastModified": 1701622587, + "narHash": "sha256-o3XhxCCyrUHZ0tlta2W7/MuXzy+n0+BUt3rKFK3DIK4=", "owner": "ipetkov", "repo": "crane", - "rev": "7b5bd9e5acb2bb0cfba2d65f34d8568a894cdb6c", + "rev": "c09d2cbe84cc2adfe1943cb2a0b55a71c835ca9a", "type": "github" }, "original": { @@ -27,32 +20,16 @@ "type": "github" } }, - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1673956053, - "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, "flake-utils": { "inputs": { "systems": "systems" }, "locked": { - "lastModified": 1681202837, - "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=", + "lastModified": 1694529238, + "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", "owner": "numtide", "repo": "flake-utils", - "rev": "cfacdce06f30d2b68473a46042957675eebb3401", + "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", "type": "github" }, "original": { @@ -63,11 +40,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1683777345, - "narHash": "sha256-V2p/A4RpEGqEZussOnHYMU6XglxBJGCODdzoyvcwig8=", + "lastModified": 1701432845, + "narHash": "sha256-06sd2rQ+DPMSueh+hW4MiXbpMSdhQHJOi/sw0vuwqvs=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "635a306fc8ede2e34cb3dd0d6d0a5d49362150ed", + "rev": "77da99a144cd341408308e0a37622f5edcc6c5ba", "type": "github" }, "original": { @@ -95,11 +72,11 @@ ] }, "locked": { - "lastModified": 1683857898, - "narHash": "sha256-pyVY4UxM6zUX97g6bk6UyCbZGCWZb2Zykrne8YxacRA=", + "lastModified": 1701569797, + "narHash": "sha256-ObvQFAPpC5IVbI2GHedSTQVzYxht2qhBgHHQnh3mYTs=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "4e7fba3f37f5e184ada0ef3cf1e4d8ef450f240b", + "rev": "516c9477757b628b157780d96d84e8c82b46dc99", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index a7a6a38..64876a4 100644 --- a/flake.nix +++ b/flake.nix @@ -7,8 +7,6 @@ crane = { url = "github:ipetkov/crane"; inputs.nixpkgs.follows = "nixpkgs"; - inputs.rust-overlay.follows = "rust-overlay"; - inputs.flake-utils.follows = "flake-utils"; }; flake-utils.url = "github:numtide/flake-utils"; @@ -146,15 +144,19 @@ buildInputs = with pkgs; [ oha - awscli2 + #awscli2 rclone - terraform nixpkgs-fmt asciinema asciinema-agg cargo-udeps cargo-outdated + cargo-watch + cargo-limit + + snakemake + parallel-full ]; }); });