From 649bfdde1720cbf29aef25bb11ab91f406f52444 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 23 Jun 2024 17:10:23 -0700 Subject: [PATCH 01/26] (chore) removed plotting and variable float compilation (feat) added detection of diagonal pasef --- Cargo.lock | 7423 +++----------------------- Cargo.toml | 14 +- src/aggregation/dbscan.rs | 15 +- src/aggregation/mod.rs | 3 +- src/aggregation/ms_denoise.rs | 78 +- src/aggregation/trace_combination.rs | 20 - src/aggregation/tracing.rs | 107 +- src/lib.rs | 2 - src/main.rs | 12 +- src/mod_types.rs | 10 - src/ms/frames.rs | 144 +- src/ms/tdf.rs | 25 +- src/space/kdtree.rs | 18 +- src/space/quad.rs | 6 +- src/space/space_generics.rs | 26 +- src/visualization.rs | 24 - 16 files changed, 850 insertions(+), 7077 deletions(-) delete mode 100644 src/aggregation/trace_combination.rs delete mode 100644 src/mod_types.rs delete mode 100644 src/visualization.rs diff --git a/Cargo.lock b/Cargo.lock index 828d727..9e73482 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,118 +2,12 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ab_glyph" -version = "0.2.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e53b0a3d5760cd2ba9b787ae0c6440ad18ee294ff71b05e3381c900a7d16cfd" -dependencies = [ - "ab_glyph_rasterizer", - "owned_ttf_parser", -] - -[[package]] -name = "ab_glyph_rasterizer" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c71b1793ee61086797f5c80b6efa2b8ffa6d5dd703f118545808a7f2e27f7046" - -[[package]] -name = "accesskit" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74a4b14f3d99c1255dcba8f45621ab1a2e7540a0009652d33989005a4d0bfc6b" -dependencies = [ - "enumn", - "serde", -] - -[[package]] -name = "accesskit_consumer" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c17cca53c09fbd7288667b22a201274b9becaa27f0b91bf52a526db95de45e6" -dependencies = [ - "accesskit", -] - -[[package]] -name = "accesskit_macos" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3b6ae1eabbfbced10e840fd3fce8a93ae84f174b3e4ba892ab7bcb42e477a7" -dependencies = [ - "accesskit", - "accesskit_consumer", - "objc2 0.3.0-beta.3.patch-leaks.3", - "once_cell", -] - -[[package]] -name = "accesskit_unix" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f46c18d99ba61ad7123dd13eeb0c104436ab6af1df6a1cd8c11054ed394a08" -dependencies = [ - "accesskit", - "accesskit_consumer", - "async-channel", - "async-once-cell", - "atspi", - "futures-lite 1.13.0", - "once_cell", - "serde", - "zbus", -] - -[[package]] -name = "accesskit_windows" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afcae27ec0974fc7c3b0b318783be89fd1b2e66dd702179fe600166a38ff4a0b" -dependencies = [ - "accesskit", - "accesskit_consumer", - "once_cell", - "paste", - "static_assertions", - "windows 0.48.0", -] - -[[package]] -name = "accesskit_winit" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88e39fcec2e10971e188730b7a76bab60647dacc973d4591855ebebcadfaa738" -dependencies = [ - "accesskit", - "accesskit_macos", - "accesskit_unix", - "accesskit_windows", - "winit", -] - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli 0.28.1", -] - [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" -[[package]] -name = "adler32" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" - [[package]] name = "ahash" version = "0.8.11" @@ -124,7 +18,6 @@ dependencies = [ "const-random", "getrandom", "once_cell", - "serde", "version_check", "zerocopy", ] @@ -159,30 +52,6 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" -[[package]] -name = "android-activity" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64529721f27c2314ced0890ce45e469574a73e5e6fdd6e9da1860eb29285f5e0" -dependencies = [ - "android-properties", - "bitflags 1.3.2", - "cc", - "jni-sys", - "libc", - "log", - "ndk", - "ndk-context", - "ndk-sys", - "num_enum 0.6.1", -] - -[[package]] -name = "android-properties" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7eb209b1518d6bb87b283c20095f5228ecda460da70b44f0802523dea6da04" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -230,11 +99,11 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" dependencies = [ - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -244,74 +113,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" dependencies = [ "anstyle", - "windows-sys 0.52.0", -] - -[[package]] -name = "anyhow" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" - -[[package]] -name = "apache-avro" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceb7c683b2f8f40970b70e39ff8be514c95b96fcb9c4af87e1ed2cb2e10801a0" -dependencies = [ - "digest", - "lazy_static", - "libflate", - "log", - "num-bigint", - "quad-rand", - "rand", - "regex-lite", - "serde", - "serde_json", - "strum 0.25.0", - "strum_macros 0.25.3", - "thiserror", - "typed-builder", - "uuid", -] - -[[package]] -name = "arboard" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb4009533e8ff8f1450a5bcbc30f4242a1d34442221f72314bea1f5dc9c7f89" -dependencies = [ - "clipboard-win", - "core-graphics 0.23.2", - "image 0.25.1", - "log", - "objc2 0.5.2", - "objc2-app-kit", - "objc2-foundation", - "parking_lot", - "windows-sys 0.48.0", - "x11rb", + "windows-sys", ] -[[package]] -name = "array-init" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" - -[[package]] -name = "array-init-cursor" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" - -[[package]] -name = "arrayvec" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" - [[package]] name = "arrow-array" version = "42.0.0" @@ -324,7 +128,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "hashbrown 0.14.5", + "hashbrown", "num", ] @@ -367,16 +171,6 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-format" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07884ea216994cdc32a2d5f8274a8bee979cfe90274b83f86f440866ee3132c7" -dependencies = [ - "planus", - "serde", -] - [[package]] name = "arrow-ipc" version = "42.0.0" @@ -411,6598 +205,1335 @@ dependencies = [ ] [[package]] -name = "arrow2" -version = "0.17.4" +name = "atty" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59c468daea140b747d781a1da9f7db5f0a8e6636d4af20cc539e43d05b0604fa" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "ahash", - "arrow-format", - "bytemuck", - "chrono", - "comfy-table", - "dyn-clone", - "either", - "ethnum", - "foreign_vec", - "getrandom", - "hash_hasher", - "num-traits", - "rustc_version", - "simdutf8", + "hermit-abi", + "libc", + "winapi", ] [[package]] -name = "ash" -version = "0.37.3+1.3.251" +name = "autocfg" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e9c3835d686b0a6084ab4234fcd1b07dbf6e4767dce60874b12356a25ecd4a" -dependencies = [ - "libloading 0.7.4", -] +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] -name = "ashpd" -version = "0.6.8" +name = "base64" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac22eda5891cc086690cb6fa10121c0390de0e3b04eb269f2d766b00d3f2d81" -dependencies = [ - "async-fs 2.1.2", - "async-net", - "enumflags2", - "futures-channel", - "futures-util", - "once_cell", - "rand", - "serde", - "serde_repr", - "url", - "zbus", -] +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] -name = "async-broadcast" -version = "0.5.1" +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c48ccdbf6ca6b121e0f586cbc0e73ae440e56c67c30fa0873b4e110d9c26d2b" -dependencies = [ - "event-listener 2.5.3", - "futures-core", -] +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "async-channel" -version = "2.3.1" +name = "bitflags" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" -dependencies = [ - "concurrent-queue", - "event-listener-strategy 0.5.2", - "futures-core", - "pin-project-lite", -] +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] -name = "async-executor" -version = "1.11.0" +name = "brotli" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b10202063978b3351199d68f8b22c4e47e4b1b822f8d43fd862d5ea8c006b29a" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" dependencies = [ - "async-task", - "concurrent-queue", - "fastrand 2.1.0", - "futures-lite 2.3.0", - "slab", + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", ] [[package]] -name = "async-fs" -version = "1.6.0" +name = "brotli-decompressor" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "279cf904654eeebfa37ac9bb1598880884924aab82e290aa65c9e77a0e142e06" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" dependencies = [ - "async-lock 2.8.0", - "autocfg", - "blocking", - "futures-lite 1.13.0", + "alloc-no-stdlib", + "alloc-stdlib", ] [[package]] -name = "async-fs" -version = "2.1.2" +name = "bumpalo" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebcd09b382f40fcd159c2d695175b2ae620ffa5f3bd6f664131efff4e8b9e04a" -dependencies = [ - "async-lock 3.3.0", - "blocking", - "futures-lite 2.3.0", -] +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] -name = "async-io" -version = "1.13.0" +name = "bytemuck" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" -dependencies = [ - "async-lock 2.8.0", - "autocfg", - "cfg-if", - "concurrent-queue", - "futures-lite 1.13.0", - "log", - "parking", - "polling 2.8.0", - "rustix 0.37.27", - "slab", - "socket2 0.4.10", - "waker-fn", -] +checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" [[package]] -name = "async-io" -version = "2.3.2" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcccb0f599cfa2f8ace422d3555572f47424da5648a4382a9dd0310ff8210884" -dependencies = [ - "async-lock 3.3.0", - "cfg-if", - "concurrent-queue", - "futures-io", - "futures-lite 2.3.0", - "parking", - "polling 3.7.0", - "rustix 0.38.34", - "slab", - "tracing", - "windows-sys 0.52.0", -] +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "async-lock" -version = "2.8.0" +name = "bytes" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" -dependencies = [ - "event-listener 2.5.3", -] +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] -name = "async-lock" -version = "3.3.0" +name = "cc" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d034b430882f8381900d3fe6f0aaa3ad94f2cb4ac519b429692a1bc2dda4ae7b" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" dependencies = [ - "event-listener 4.0.3", - "event-listener-strategy 0.4.0", - "pin-project-lite", + "jobserver", + "libc", + "once_cell", ] [[package]] -name = "async-net" -version = "2.0.0" +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b948000fad4873c1c9339d60f2623323a0cfd3816e5181033c6a5cb68b2accf7" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ - "async-io 2.3.2", - "blocking", - "futures-lite 2.3.0", + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", ] [[package]] -name = "async-once-cell" -version = "0.5.3" +name = "clap" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +dependencies = [ + "clap_builder", + "clap_derive", +] [[package]] -name = "async-process" -version = "1.8.1" +name = "clap_builder" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea6438ba0a08d81529c69b36700fa2f95837bfe3e776ab39cde9c14d9149da88" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" dependencies = [ - "async-io 1.13.0", - "async-lock 2.8.0", - "async-signal", - "blocking", - "cfg-if", - "event-listener 3.1.0", - "futures-lite 1.13.0", - "rustix 0.38.34", - "windows-sys 0.48.0", + "anstream", + "anstyle", + "clap_lex", + "strsim", ] [[package]] -name = "async-recursion" -version = "1.1.1" +name = "clap_derive" +version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ + "heck", "proc-macro2", "quote", "syn 2.0.66", ] [[package]] -name = "async-signal" -version = "0.2.6" +name = "clap_lex" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afe66191c335039c7bb78f99dc7520b0cbb166b3a1cb33a03f53d8a1c6f2afda" -dependencies = [ - "async-io 2.3.2", - "async-lock 3.3.0", - "atomic-waker", - "cfg-if", - "futures-core", - "futures-io", - "rustix 0.38.34", - "signal-hook-registry", - "slab", - "windows-sys 0.52.0", -] +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" [[package]] -name = "async-task" -version = "4.7.1" +name = "colorchoice" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" [[package]] -name = "async-trait" -version = "0.1.80" +name = "console" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys", ] [[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - -[[package]] -name = "atspi" -version = "0.19.0" +name = "const-random" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6059f350ab6f593ea00727b334265c4dfc7fd442ee32d264794bd9bdc68e87ca" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" dependencies = [ - "atspi-common", - "atspi-connection", - "atspi-proxies", + "const-random-macro", ] [[package]] -name = "atspi-common" -version = "0.3.0" +name = "const-random-macro" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92af95f966d2431f962bc632c2e68eda7777330158bf640c4af4249349b2cdf5" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "enumflags2", - "serde", - "static_assertions", - "zbus", - "zbus_names", - "zvariant", + "getrandom", + "once_cell", + "tiny-keccak", ] [[package]] -name = "atspi-connection" -version = "0.3.0" +name = "core-foundation-sys" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0c65e7d70f86d4c0e3b2d585d9bf3f979f0b19d635a336725a88d279f76b939" -dependencies = [ - "atspi-common", - "atspi-proxies", - "futures-lite 1.13.0", - "zbus", -] +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] -name = "atspi-proxies" -version = "0.3.0" +name = "crc32fast" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6495661273703e7a229356dcbe8c8f38223d697aacfaf0e13590a9ac9977bb52" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" dependencies = [ - "atspi-common", - "serde", - "zbus", + "cfg-if", ] [[package]] -name = "atty" -version = "0.2.14" +name = "crossbeam-deque" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "crossbeam-epoch", + "crossbeam-utils", ] [[package]] -name = "autocfg" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" - -[[package]] -name = "az" -version = "1.2.1" +name = "crossbeam-epoch" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] [[package]] -name = "backtrace" -version = "0.3.71" +name = "crossbeam-utils" +version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] -name = "base64" -version = "0.13.1" +name = "crunchy" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] -name = "base64" -version = "0.21.7" +name = "csv" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] [[package]] -name = "base64" -version = "0.22.1" +name = "csv-core" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] [[package]] -name = "bincode" -version = "1.3.3" +name = "dashmap" +version = "5.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ - "serde", + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", + "rayon", ] [[package]] -name = "bit-set" -version = "0.5.3" +name = "displaydoc" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +checksum = "adc2ab4d5a16117f9029e9a6b5e4e79f4c67f6519bc134210d4d4a04ba31f41b" dependencies = [ - "bit-vec", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] -name = "bit-vec" -version = "0.6.3" +name = "either" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] -name = "bitflags" -version = "1.3.2" +name = "encode_unicode" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] -name = "bitflags" -version = "2.5.0" +name = "env_logger" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" dependencies = [ - "bytemuck", - "serde", + "atty", + "humantime", + "log", + "regex", + "termcolor", ] [[package]] -name = "block" -version = "0.1.6" +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] -name = "block-buffer" -version = "0.10.4" +name = "fallible-iterator" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] -name = "block-sys" -version = "0.1.0-beta.1" +name = "fallible-streaming-iterator" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa55741ee90902547802152aaf3f8e5248aab7e21468089560d4c8840561146" -dependencies = [ - "objc-sys 0.2.0-beta.2", -] +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] -name = "block2" -version = "0.2.0-alpha.6" +name = "flatbuffers" +version = "23.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dd9e63c1744f755c2f60332b88de39d341e5e86239014ad839bd71c106dec42" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" dependencies = [ - "block-sys", - "objc2-encode 2.0.0-pre.2", + "bitflags 1.3.2", + "rustc_version", ] [[package]] -name = "block2" -version = "0.5.1" +name = "flate2" +version = "1.0.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c132eebf10f5cad5289222520a4a058514204aed6d791f1cf4fe8088b82d15f" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" dependencies = [ - "objc2 0.5.2", + "crc32fast", + "miniz_oxide", ] [[package]] -name = "blocking" -version = "1.6.0" +name = "fnv" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "495f7104e962b7356f0aeb34247aca1fe7d2e783b346582db7f2904cb5717e88" -dependencies = [ - "async-channel", - "async-lock 3.3.0", - "async-task", - "futures-io", - "futures-lite 2.3.0", - "piper", -] +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "brotli" -version = "3.5.0" +name = "getrandom" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", + "cfg-if", + "libc", + "wasi", ] [[package]] -name = "brotli-decompressor" -version = "2.5.1" +name = "half" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "cfg-if", + "crunchy", + "num-traits", ] [[package]] -name = "bstr" -version = "0.2.17" +name = "hashbrown" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "lazy_static", - "memchr", - "regex-automata 0.1.10", + "ahash", + "allocator-api2", ] [[package]] -name = "bumpalo" -version = "3.16.0" +name = "hashlink" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] [[package]] -name = "bytecount" -version = "0.6.8" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] -name = "bytemuck" -version = "1.16.0" +name = "hermit-abi" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ - "bytemuck_derive", + "libc", ] [[package]] -name = "bytemuck_derive" -version = "1.6.1" +name = "humantime" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "369cfaf2a5bed5d8f8202073b2e093c9f508251de1551a0deb4253e4c7d80909" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", + "quick-error", ] [[package]] -name = "byteorder" -version = "1.5.0" +name = "iana-time-zone" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] [[package]] -name = "bytes" -version = "1.6.0" +name = "iana-time-zone-haiku" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] [[package]] -name = "calloop" -version = "0.10.6" +name = "indexmap" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e0d00eb1ea24371a97d2da6201c6747a633dc6dc1988ef503403b4c59504a8" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ - "bitflags 1.3.2", - "log", - "nix 0.25.1", - "slotmap", - "thiserror", - "vec_map", + "equivalent", + "hashbrown", ] [[package]] -name = "camino" -version = "1.1.7" +name = "indicatif" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ec6b951b160caa93cc0c7b209e5a3bff7aae9062213451ac99493cd844c239" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" dependencies = [ - "serde", + "console", + "instant", + "number_prefix", + "portable-atomic", + "rayon", + "unicode-width", ] [[package]] -name = "cargo-platform" -version = "0.1.8" +name = "instant" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ - "serde", + "cfg-if", ] [[package]] -name = "cargo_metadata" -version = "0.14.2" +name = "integer-encoding" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4acbb09d9ee8e23699b9634375c72795d095bf268439da88562cf9b501f181fa" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "ionmesh" +version = "0.2.0" dependencies = [ - "camino", - "cargo-platform", - "semver", + "clap", + "csv", + "indicatif", + "log", + "num", + "num-traits", + "pretty_env_logger", + "rand", + "rayon", + "rusqlite", + "sage-core", "serde", "serde_json", + "timsrust", + "toml", ] [[package]] -name = "cargo_metadata" -version = "0.18.1" +name = "is_terminal_polyfill" +version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" -dependencies = [ - "camino", - "cargo-platform", - "semver", - "serde", - "serde_json", - "thiserror", -] +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" [[package]] -name = "cc" -version = "1.0.98" +name = "itertools" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" -dependencies = [ - "jobserver", - "libc", - "once_cell", -] - -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - -[[package]] -name = "cfb" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" -dependencies = [ - "byteorder", - "fnv", - "uuid", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "cfg_aliases" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" - -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - -[[package]] -name = "chrono" -version = "0.4.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "num-traits", - "windows-targets 0.52.5", -] - -[[package]] -name = "clang-format" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "696283b40e1a39d208ee614b92e5f6521d16962edeb47c48372585ec92419943" -dependencies = [ - "thiserror", -] - -[[package]] -name = "clap" -version = "4.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" -dependencies = [ - "clap_builder", - "clap_derive", -] - -[[package]] -name = "clap_builder" -version = "4.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_derive" -version = "4.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "clap_lex" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" - -[[package]] -name = "clean-path" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaa6b4b263a5d737e9bf6b7c09b72c41a5480aec4d7219af827f6564e950b6a5" - -[[package]] -name = "clipboard-win" -version = "5.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79f4473f5144e20d9aceaf2972478f06ddf687831eafeeb434fbaf0acc4144ad" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ - "error-code", + "either", ] [[package]] -name = "cocoa" -version = "0.24.1" +name = "itoa" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f425db7937052c684daec3bd6375c8abe2d146dca4b8b143d6db777c39138f3a" -dependencies = [ - "bitflags 1.3.2", - "block", - "cocoa-foundation", - "core-foundation", - "core-graphics 0.22.3", - "foreign-types 0.3.2", - "libc", - "objc", -] +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] -name = "cocoa-foundation" -version = "0.1.2" +name = "jobserver" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c6234cbb2e4c785b456c0644748b1ac416dd045799740356f8363dfe00c93f7" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" dependencies = [ - "bitflags 1.3.2", - "block", - "core-foundation", - "core-graphics-types", "libc", - "objc", ] [[package]] -name = "codespan-reporting" -version = "0.11.1" +name = "js-sys" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ - "termcolor", - "unicode-width", + "wasm-bindgen", ] [[package]] -name = "color_quant" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" - -[[package]] -name = "colorchoice" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" - -[[package]] -name = "com-rs" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf43edc576402991846b093a7ca18a3477e0ef9c588cde84964b5d3e43016642" - -[[package]] -name = "combine" -version = "4.6.7" +name = "lazy_static" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" -dependencies = [ - "bytes", - "memchr", -] +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] -name = "comfy-table" -version = "6.2.0" +name = "lexical-core" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e959d788268e3bf9d35ace83e81b124190378e4c91c9067524675e33394b8ba" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "strum 0.24.1", - "strum_macros 0.24.3", - "unicode-width", + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", ] [[package]] -name = "concurrent-queue" -version = "2.5.0" +name = "lexical-parse-float" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "crossbeam-utils", + "lexical-parse-integer", + "lexical-util", + "static_assertions", ] [[package]] -name = "console" -version = "0.15.8" +name = "lexical-parse-integer" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "encode_unicode", - "lazy_static", - "libc", - "unicode-width", - "windows-sys 0.52.0", + "lexical-util", + "static_assertions", ] [[package]] -name = "const-random" -version = "0.1.18" +name = "lexical-util" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" dependencies = [ - "const-random-macro", + "static_assertions", ] [[package]] -name = "const-random-macro" -version = "0.1.16" +name = "lexical-write-float" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "getrandom", - "once_cell", - "tiny-keccak", + "lexical-util", + "lexical-write-integer", + "static_assertions", ] [[package]] -name = "convert_case" -version = "0.6.0" +name = "lexical-write-integer" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "unicode-segmentation", + "lexical-util", + "static_assertions", ] [[package]] -name = "core-foundation" -version = "0.9.4" +name = "libc" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] -name = "core-foundation-sys" -version = "0.8.6" +name = "libm" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] -name = "core-graphics" -version = "0.22.3" +name = "libsqlite3-sys" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2581bbab3b8ffc6fcbd550bf46c355135d16e9ff2a6ea032ad6b9bf1d7efe4fb" +checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "core-graphics-types", - "foreign-types 0.3.2", - "libc", + "cc", + "pkg-config", + "vcpkg", ] [[package]] -name = "core-graphics" -version = "0.23.2" +name = "linreg" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" +checksum = "f8c6fd2f0b9338b6d298dbcee4b4ec2a6ec2e88dc0a5f6f92d5cecd65afdf445" dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "core-graphics-types", - "foreign-types 0.5.0", - "libc", + "displaydoc", + "num-traits", ] [[package]] -name = "core-graphics-types" -version = "0.1.3" +name = "lock_api" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "libc", + "autocfg", + "scopeguard", ] [[package]] -name = "core2" -version = "0.4.0" +name = "log" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" -dependencies = [ - "memchr", -] +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] -name = "cpufeatures" -version = "0.2.12" +name = "lz4" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" dependencies = [ "libc", + "lz4-sys", ] [[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.13" +name = "lz4-sys" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" dependencies = [ - "crossbeam-utils", + "cc", + "libc", ] [[package]] -name = "crossbeam-deque" -version = "0.8.5" +name = "memchr" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-queue" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] - -[[package]] -name = "d3d12" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16e44ab292b1dddfdaf7be62cfd8877df52f2f3fde5858d95bab606be259f20" -dependencies = [ - "bitflags 2.5.0", - "libloading 0.8.3", - "winapi", -] - -[[package]] -name = "darling" -version = "0.20.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.20.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "darling_macro" -version = "0.20.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" -dependencies = [ - "darling_core", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "dary_heap" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" - -[[package]] -name = "dashmap" -version = "5.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" -dependencies = [ - "cfg-if", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", - "rayon", -] - -[[package]] -name = "data-encoding" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" - -[[package]] -name = "deranged" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", - "serde", -] - -[[package]] -name = "derivative" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - -[[package]] -name = "directories-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339ee130d97a610ea5a5872d2bbb130fdf68884ff09d3028b81bec8a1ac23bbc" -dependencies = [ - "cfg-if", - "dirs-sys-next", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "dispatch" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd0c93bb4b0c6d9b77f4435b0ae98c24d17f1c45b2ff844c6151a07256ca923b" - -[[package]] -name = "displaydoc" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc2ab4d5a16117f9029e9a6b5e4e79f4c67f6519bc134210d4d4a04ba31f41b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "dlib" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "330c60081dcc4c72131f8eb70510f1ac07223e5d4163db481a04a0befcffa412" -dependencies = [ - "libloading 0.8.3", -] - -[[package]] -name = "document-features" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef5282ad69563b5fc40319526ba27e0e7363d552a896f0297d54f767717f9b95" -dependencies = [ - "litrs", -] - -[[package]] -name = "downcast-rs" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" - -[[package]] -name = "dyn-clone" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" - -[[package]] -name = "ecolor" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b7637fc2e74d17e52931bac90ff4fc061ac776ada9c7fa272f24cdca5991972" -dependencies = [ - "bytemuck", - "serde", -] - -[[package]] -name = "eframe" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd73918a828c35a7efb4d7188ea973df4bffc589178ed95f521c917b03ddcfa" -dependencies = [ - "bytemuck", - "cocoa", - "directories-next", - "egui", - "egui-wgpu", - "egui-winit", - "egui_glow", - "image 0.24.9", - "js-sys", - "log", - "objc", - "parking_lot", - "percent-encoding", - "pollster", - "puffin 0.18.1", - "raw-window-handle", - "ron", - "serde", - "static_assertions", - "thiserror", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "wgpu", - "winapi", - "winit", -] - -[[package]] -name = "egui" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55bcb864b764eb889515a38b8924757657a250738ad15126637ee2df291ee6b" -dependencies = [ - "accesskit", - "ahash", - "backtrace", - "epaint", - "log", - "nohash-hasher", - "puffin 0.18.1", - "ron", - "serde", -] - -[[package]] -name = "egui-wgpu" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d8ea73b329649be625fac2c9b190a2a8f9a66f98610c4b09124b596c6695053" -dependencies = [ - "bytemuck", - "egui", - "epaint", - "log", - "puffin 0.18.1", - "thiserror", - "type-map", - "wgpu", - "winit", -] - -[[package]] -name = "egui-winit" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b673606b6606b12b95e3a3194d7882bf5cff302db36a520b8144c7c342e4e84" -dependencies = [ - "accesskit_winit", - "arboard", - "egui", - "log", - "puffin 0.18.1", - "raw-window-handle", - "serde", - "smithay-clipboard", - "web-time", - "webbrowser", - "winit", -] - -[[package]] -name = "egui_commonmark" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2ebe7b74aef576cc2fa4e2553ed0c15599fa3fe5966d2277355d3c08a53114" -dependencies = [ - "egui", - "egui_extras", - "pulldown-cmark", -] - -[[package]] -name = "egui_extras" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97624eaf17a16058265d3a3e712e167798655baf7c8f693de25be75cdd6c57b5" -dependencies = [ - "egui", - "ehttp", - "enum-map", - "image 0.24.9", - "log", - "mime_guess2", - "puffin 0.18.1", - "serde", -] - -[[package]] -name = "egui_glow" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "262151f9d57c557c02a40a46f27b9e050a6eb0b006b94dced9c6f4519a04d489" -dependencies = [ - "bytemuck", - "egui", - "egui-winit", - "glow 0.12.3", - "log", - "memoffset 0.7.1", - "puffin 0.18.1", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "egui_plot" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29b484821a5b336af40a34151a940f52bef0f1ab56ec0d8cf80f74783eaae412" -dependencies = [ - "egui", -] - -[[package]] -name = "egui_tiles" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5826eeca11851ff6fbfecfabdf1abcda96d40562b5b50ad877a9a0b43126b40a" -dependencies = [ - "ahash", - "egui", - "itertools 0.12.1", - "log", - "serde", -] - -[[package]] -name = "ehttp" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f88f45662356f96afc7d9e2bc9910ad8352ee01417f7c69b8b16a53c8767a75d" -dependencies = [ - "document-features", - "futures-util", - "js-sys", - "ureq", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams", - "web-sys", -] - -[[package]] -name = "either" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" - -[[package]] -name = "emath" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a045c6c0b44b35e98513fc1e9d183ab42881ac27caccb9fa345465601f56cce4" -dependencies = [ - "bytemuck", - "serde", -] - -[[package]] -name = "encode_unicode" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" - -[[package]] -name = "enum-map" -version = "2.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6866f3bfdf8207509a033af1a75a7b08abda06bbaaeae6669323fd5a097df2e9" -dependencies = [ - "enum-map-derive", - "serde", -] - -[[package]] -name = "enum-map-derive" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "enumflags2" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3278c9d5fb675e0a51dabcf4c0d355f692b064171535ba72361be1528a9d8e8d" -dependencies = [ - "enumflags2_derive", - "serde", -] - -[[package]] -name = "enumflags2_derive" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c785274071b1b420972453b306eeca06acf4633829db4223b58a2a8c5953bc4" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "enumn" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fd000fd6988e73bbe993ea3db9b1aa64906ab88766d654973924340c8cddb42" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "enumset" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226c0da7462c13fb57e5cc9e0dc8f0635e7d27f276a3a7fd30054647f669007d" -dependencies = [ - "enumset_derive", -] - -[[package]] -name = "enumset_derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08b6c6ab82d70f08844964ba10c7babb716de2ecaeab9be5717918a5177d3af" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "env_logger" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" -dependencies = [ - "atty", - "humantime 1.3.0", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "env_logger" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" -dependencies = [ - "humantime 2.1.0", - "is-terminal", - "log", - "termcolor", -] - -[[package]] -name = "epaint" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1b9e000d21bab9b535ce78f9f7745be28b3f777f6c7223936561c5c7fefab8" -dependencies = [ - "ab_glyph", - "ahash", - "bytemuck", - "ecolor", - "emath", - "log", - "nohash-hasher", - "parking_lot", - "serde", -] - -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "error-chain" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2f06b9cac1506ece98fe3231e3cc9c4410ec3d5b1f24ae1c8946f0742cdefc" -dependencies = [ - "version_check", -] - -[[package]] -name = "error-code" -version = "3.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0474425d51df81997e2f90a21591180b38eccf27292d755f3e30750225c175b" - -[[package]] -name = "ethnum" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" - -[[package]] -name = "event-listener" -version = "2.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" - -[[package]] -name = "event-listener" -version = "3.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d93877bcde0eb80ca09131a08d23f0a5c18a620b01db137dba666d18cd9b30c2" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener" -version = "4.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b215c49b2b248c855fb73579eb1f4f26c38ffdc12973e20e07b91d78d5646e" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d9944b8ca13534cdfb2800775f8dd4902ff3fc75a50101466decadfdf322a24" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener-strategy" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3" -dependencies = [ - "event-listener 4.0.3", - "pin-project-lite", -] - -[[package]] -name = "event-listener-strategy" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" -dependencies = [ - "event-listener 5.3.0", - "pin-project-lite", -] - -[[package]] -name = "ewebsock" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d031a4d5ce7817b9146a559be40bd4218363ead4e1b6f948ec9086cce96dde" -dependencies = [ - "document-features", - "js-sys", - "log", - "tungstenite", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - -[[package]] -name = "fastrand" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] - -[[package]] -name = "fastrand" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" - -[[package]] -name = "fdeflate" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" -dependencies = [ - "simd-adler32", -] - -[[package]] -name = "filetime" -version = "0.2.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall 0.4.1", - "windows-sys 0.52.0", -] - -[[package]] -name = "fixed" -version = "1.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc715d38bea7b5bf487fcd79bcf8c209f0b58014f3018a7a19c2b855f472048" -dependencies = [ - "az", - "bytemuck", - "half", - "serde", - "typenum", -] - -[[package]] -name = "flatbuffers" -version = "23.5.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" -dependencies = [ - "bitflags 1.3.2", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "flume" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" -dependencies = [ - "futures-core", - "futures-sink", - "nanorand", - "spin", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared 0.1.1", -] - -[[package]] -name = "foreign-types" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" -dependencies = [ - "foreign-types-macros", - "foreign-types-shared 0.3.1", -] - -[[package]] -name = "foreign-types-macros" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - -[[package]] -name = "foreign-types-shared" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" - -[[package]] -name = "foreign_vec" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "fsevent-sys" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" -dependencies = [ - "libc", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-lite" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" -dependencies = [ - "fastrand 1.9.0", - "futures-core", - "futures-io", - "memchr", - "parking", - "pin-project-lite", - "waker-fn", -] - -[[package]] -name = "futures-lite" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52527eb5074e35e9339c6b4e8d12600c7128b68fb25dcb9fa9dec18f7c25f3a5" -dependencies = [ - "fastrand 2.1.0", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - -[[package]] -name = "futures-macro" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "gethostname" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" -dependencies = [ - "libc", - "windows-targets 0.48.5", -] - -[[package]] -name = "getrandom" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" -dependencies = [ - "cfg-if", - "js-sys", - "libc", - "wasi", - "wasm-bindgen", -] - -[[package]] -name = "gimli" -version = "0.26.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" -dependencies = [ - "fallible-iterator", - "indexmap 1.9.3", - "stable_deref_trait", -] - -[[package]] -name = "gimli" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" - -[[package]] -name = "gl_generator" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" -dependencies = [ - "khronos_api", - "log", - "xml-rs", -] - -[[package]] -name = "glam" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12f597d56c1bd55a811a1be189459e8fad2bbc272616375602443bdfb37fa774" -dependencies = [ - "bytemuck", - "serde", -] - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "glow" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca0fe580e4b60a8ab24a868bc08e2f03cbcb20d3d676601fa909386713333728" -dependencies = [ - "js-sys", - "slotmap", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "glow" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1" -dependencies = [ - "js-sys", - "slotmap", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "gltf" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ce1918195723ce6ac74e80542c5a96a40c2b26162c1957a5cd70799b8cacf7" -dependencies = [ - "base64 0.13.1", - "byteorder", - "gltf-json", - "image 0.25.1", - "lazy_static", - "serde_json", - "urlencoding", -] - -[[package]] -name = "gltf-derive" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14070e711538afba5d6c807edb74bcb84e5dbb9211a3bf5dea0dfab5b24f4c51" -dependencies = [ - "inflections", - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "gltf-json" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6176f9d60a7eab0a877e8e96548605dedbde9190a7ae1e80bbcc1c9af03ab14" -dependencies = [ - "gltf-derive", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "glutin_wgl_sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8098adac955faa2d31079b65dc48841251f69efd3ac25477903fc424362ead" -dependencies = [ - "gl_generator", -] - -[[package]] -name = "gpu-alloc" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" -dependencies = [ - "bitflags 2.5.0", - "gpu-alloc-types", -] - -[[package]] -name = "gpu-alloc-types" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" -dependencies = [ - "bitflags 2.5.0", -] - -[[package]] -name = "gpu-allocator" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40fe17c8a05d60c38c0a4e5a3c802f2f1ceb66b76c67d96ffb34bef0475a7fad" -dependencies = [ - "backtrace", - "log", - "presser", - "thiserror", - "winapi", - "windows 0.51.1", -] - -[[package]] -name = "gpu-descriptor" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc11df1ace8e7e564511f53af41f3e42ddc95b56fd07b3f4445d2a6048bc682c" -dependencies = [ - "bitflags 2.5.0", - "gpu-descriptor-types", - "hashbrown 0.14.5", -] - -[[package]] -name = "gpu-descriptor-types" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf0b36e6f090b7e1d8a4b49c0cb81c1f8376f72198c65dd3ad9ff3556b8b78c" -dependencies = [ - "bitflags 2.5.0", -] - -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap 2.2.6", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "half" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" -dependencies = [ - "bytemuck", - "cfg-if", - "crunchy", - "num-traits", -] - -[[package]] -name = "hash_hasher" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "hashlink" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" -dependencies = [ - "hashbrown 0.14.5", -] - -[[package]] -name = "hassle-rs" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1397650ee315e8891a0df210707f0fc61771b0cc518c3023896064c5407cb3b0" -dependencies = [ - "bitflags 1.3.2", - "com-rs", - "libc", - "libloading 0.7.4", - "thiserror", - "widestring", - "winapi", -] - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hexf-parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" - -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" - -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "humantime" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "hyper" -version = "0.14.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http 0.2.12", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.5.7", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "iana-time-zone" -version = "0.1.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core 0.52.0", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "id-arena" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "image" -version = "0.24.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5690139d2f55868e080017335e4b94cb7414274c74f1669c84fb5feba2c9f69d" -dependencies = [ - "bytemuck", - "byteorder", - "color_quant", - "num-traits", - "png", -] - -[[package]] -name = "image" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" -dependencies = [ - "bytemuck", - "byteorder", - "num-traits", - "png", - "tiff", - "zune-core", - "zune-jpeg", -] - -[[package]] -name = "indent" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9f1a0777d972970f204fdf8ef319f1f4f8459131636d7e3c96c5d59570d0fa6" - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" -dependencies = [ - "equivalent", - "hashbrown 0.14.5", -] - -[[package]] -name = "indicatif" -version = "0.17.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" -dependencies = [ - "console", - "instant", - "number_prefix", - "portable-atomic", - "rayon", - "unicode-width", -] - -[[package]] -name = "infer" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb33622da908807a06f9513c19b3c1ad50fab3e4137d82a78107d502075aa199" -dependencies = [ - "cfb", -] - -[[package]] -name = "inflections" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a257582fdcde896fd96463bf2d40eefea0580021c0712a0e2b028b60b47a837a" - -[[package]] -name = "inotify" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" -dependencies = [ - "bitflags 1.3.2", - "inotify-sys", - "libc", -] - -[[package]] -name = "inotify-sys" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" -dependencies = [ - "libc", -] - -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi 0.3.9", - "libc", - "windows-sys 0.48.0", -] - -[[package]] -name = "ionmesh" -version = "0.1.0" -dependencies = [ - "apache-avro", - "clap", - "csv", - "indicatif", - "log", - "num", - "num-traits", - "pretty_env_logger", - "rand", - "rayon", - "rerun", - "rusqlite", - "sage-core", - "serde", - "serde_json", - "timsrust", - "toml", -] - -[[package]] -name = "is-terminal" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" -dependencies = [ - "hermit-abi 0.3.9", - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "is_terminal_polyfill" -version = "1.70.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "jni" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" -dependencies = [ - "cesu8", - "cfg-if", - "combine", - "jni-sys", - "log", - "thiserror", - "walkdir", - "windows-sys 0.45.0", -] - -[[package]] -name = "jni-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" - -[[package]] -name = "jobserver" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" -dependencies = [ - "libc", -] - -[[package]] -name = "jpeg-decoder" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" - -[[package]] -name = "js-sys" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "khronos-egl" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" -dependencies = [ - "libc", - "libloading 0.8.3", - "pkg-config", -] - -[[package]] -name = "khronos_api" -version = "3.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" - -[[package]] -name = "kqueue" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c" -dependencies = [ - "kqueue-sys", - "libc", -] - -[[package]] -name = "kqueue-sys" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" -dependencies = [ - "bitflags 1.3.2", - "libc", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "leb128" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" - -[[package]] -name = "lexical-core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.155" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" - -[[package]] -name = "libflate" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" -dependencies = [ - "adler32", - "core2", - "crc32fast", - "dary_heap", - "libflate_lz77", -] - -[[package]] -name = "libflate_lz77" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" -dependencies = [ - "core2", - "hashbrown 0.14.5", - "rle-decode-fast", -] - -[[package]] -name = "libloading" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] - -[[package]] -name = "libloading" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" -dependencies = [ - "cfg-if", - "windows-targets 0.52.5", -] - -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - -[[package]] -name = "libredox" -version = "0.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3af92c55d7d839293953fcd0fda5ecfe93297cfde6ffbdec13b41d99c0ba6607" -dependencies = [ - "bitflags 2.5.0", - "libc", - "redox_syscall 0.4.1", -] - -[[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags 2.5.0", - "libc", -] - -[[package]] -name = "libsqlite3-sys" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" - -[[package]] -name = "linreg" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c6fd2f0b9338b6d298dbcee4b4ec2a6ec2e88dc0a5f6f92d5cecd65afdf445" -dependencies = [ - "displaydoc", - "num-traits", -] - -[[package]] -name = "linux-raw-sys" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" - -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - -[[package]] -name = "litrs" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" - -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", - "serde", -] - -[[package]] -name = "log" -version = "0.4.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" - -[[package]] -name = "log-once" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d8a05e3879b317b1b6dbf353e5bba7062bedcc59815267bb23eaa0c576cebf0" -dependencies = [ - "log", -] - -[[package]] -name = "lz4" -version = "1.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "lz4_flex" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "macaw" -version = "0.18.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fdbfdf07a7e53090afb7fd427eb0a4b46fc51cb484b2deba27b47919762dfb" -dependencies = [ - "glam", - "num-traits", - "serde", -] - -[[package]] -name = "malloc_buf" -version = "0.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" -dependencies = [ - "libc", -] - -[[package]] -name = "matrixmultiply" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "memchr" -version = "2.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" - -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - -[[package]] -name = "memmap2" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" -dependencies = [ - "libc", -] - -[[package]] -name = "memoffset" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" -dependencies = [ - "autocfg", -] - -[[package]] -name = "memoffset" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" -dependencies = [ - "autocfg", -] - -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - -[[package]] -name = "memory-stats" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34f79cf9964c5c9545493acda1263f1912f8d2c56c8a2ffee2606cb960acaacc" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "metal" -version = "0.27.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25" -dependencies = [ - "bitflags 2.5.0", - "block", - "core-graphics-types", - "foreign-types 0.5.0", - "log", - "objc", - "paste", -] - -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "mime_guess2" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a3333bb1609500601edc766a39b4c1772874a4ce26022f4d866854dc020c41" -dependencies = [ - "mime", - "unicase", -] - -[[package]] -name = "miniz_oxide" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" -dependencies = [ - "adler", - "simd-adler32", -] - -[[package]] -name = "mio" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" -dependencies = [ - "libc", - "log", - "wasi", - "windows-sys 0.48.0", -] - -[[package]] -name = "naga" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae585df4b6514cf8842ac0f1ab4992edc975892704835b549cf818dc0191249e" -dependencies = [ - "bit-set", - "bitflags 2.5.0", - "codespan-reporting", - "hexf-parse", - "indexmap 2.2.6", - "log", - "num-traits", - "rustc-hash", - "spirv", - "termcolor", - "thiserror", - "unicode-xid", -] - -[[package]] -name = "nanorand" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" -dependencies = [ - "getrandom", -] - -[[package]] -name = "natord" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c" - -[[package]] -name = "ndarray" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "rawpointer", -] - -[[package]] -name = "ndk" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "451422b7e4718271c8b5b3aadf5adedba43dc76312454b387e98fae0fc951aa0" -dependencies = [ - "bitflags 1.3.2", - "jni-sys", - "ndk-sys", - "num_enum 0.5.11", - "raw-window-handle", - "thiserror", -] - -[[package]] -name = "ndk-context" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" - -[[package]] -name = "ndk-sys" -version = "0.4.1+23.1.7779620" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cf2aae958bd232cac5069850591667ad422d263686d75b52a065f9badeee5a3" -dependencies = [ - "jni-sys", -] - -[[package]] -name = "never" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96aba5aa877601bb3f6dd6a63a969e1f82e60646e81e71b14496995e9853c91" - -[[package]] -name = "nix" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" -dependencies = [ - "bitflags 1.3.2", - "cfg-if", - "libc", - "memoffset 0.6.5", -] - -[[package]] -name = "nix" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" -dependencies = [ - "autocfg", - "bitflags 1.3.2", - "cfg-if", - "libc", - "memoffset 0.6.5", -] - -[[package]] -name = "nix" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" -dependencies = [ - "bitflags 1.3.2", - "cfg-if", - "libc", - "memoffset 0.7.1", -] - -[[package]] -name = "nohash-hasher" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" - -[[package]] -name = "notify" -version = "6.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" -dependencies = [ - "bitflags 2.5.0", - "crossbeam-channel", - "filetime", - "fsevent-sys", - "inotify", - "kqueue", - "libc", - "log", - "mio", - "walkdir", - "windows-sys 0.48.0", -] - -[[package]] -name = "ntapi" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" -dependencies = [ - "winapi", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - -[[package]] -name = "num-derive" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - -[[package]] -name = "num_enum" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" -dependencies = [ - "num_enum_derive 0.5.11", -] - -[[package]] -name = "num_enum" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" -dependencies = [ - "num_enum_derive 0.6.1", -] - -[[package]] -name = "num_enum_derive" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "num_enum_derive" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "num_threads" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" -dependencies = [ - "libc", -] - -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - -[[package]] -name = "objc" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" -dependencies = [ - "malloc_buf", - "objc_exception", -] - -[[package]] -name = "objc-foundation" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1add1b659e36c9607c7aab864a76c7a4c2760cd0cd2e120f3fb8b952c7e22bf9" -dependencies = [ - "block", - "objc", - "objc_id", -] - -[[package]] -name = "objc-sys" -version = "0.2.0-beta.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b9834c1e95694a05a828b59f55fa2afec6288359cda67146126b3f90a55d7" - -[[package]] -name = "objc-sys" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb91bdd390c7ce1a8607f35f3ca7151b65afc0ff5ff3b34fa350f7d7c7e4310" - -[[package]] -name = "objc2" -version = "0.3.0-beta.3.patch-leaks.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e01640f9f2cb1220bbe80325e179e532cb3379ebcd1bf2279d703c19fe3a468" -dependencies = [ - "block2 0.2.0-alpha.6", - "objc-sys 0.2.0-beta.2", - "objc2-encode 2.0.0-pre.2", -] - -[[package]] -name = "objc2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46a785d4eeff09c14c487497c162e92766fbb3e4059a71840cecc03d9a50b804" -dependencies = [ - "objc-sys 0.3.5", - "objc2-encode 4.0.3", -] - -[[package]] -name = "objc2-app-kit" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" -dependencies = [ - "bitflags 2.5.0", - "block2 0.5.1", - "libc", - "objc2 0.5.2", - "objc2-core-data", - "objc2-core-image", - "objc2-foundation", - "objc2-quartz-core", -] - -[[package]] -name = "objc2-core-data" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" -dependencies = [ - "bitflags 2.5.0", - "block2 0.5.1", - "objc2 0.5.2", - "objc2-foundation", -] - -[[package]] -name = "objc2-core-image" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55260963a527c99f1819c4f8e3b47fe04f9650694ef348ffd2227e8196d34c80" -dependencies = [ - "block2 0.5.1", - "objc2 0.5.2", - "objc2-foundation", - "objc2-metal", -] - -[[package]] -name = "objc2-encode" -version = "2.0.0-pre.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abfcac41015b00a120608fdaa6938c44cb983fee294351cc4bac7638b4e50512" -dependencies = [ - "objc-sys 0.2.0-beta.2", -] - -[[package]] -name = "objc2-encode" -version = "4.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7891e71393cd1f227313c9379a26a584ff3d7e6e7159e988851f0934c993f0f8" - -[[package]] -name = "objc2-foundation" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" -dependencies = [ - "bitflags 2.5.0", - "block2 0.5.1", - "libc", - "objc2 0.5.2", -] - -[[package]] -name = "objc2-metal" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6" -dependencies = [ - "bitflags 2.5.0", - "block2 0.5.1", - "objc2 0.5.2", - "objc2-foundation", -] - -[[package]] -name = "objc2-quartz-core" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a" -dependencies = [ - "bitflags 2.5.0", - "block2 0.5.1", - "objc2 0.5.2", - "objc2-foundation", - "objc2-metal", -] - -[[package]] -name = "objc_exception" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad970fb455818ad6cba4c122ad012fae53ae8b4795f86378bce65e4f6bab2ca4" -dependencies = [ - "cc", -] - -[[package]] -name = "objc_id" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c92d4ddb4bd7b50d730c215ff871754d0da6b2178849f8a2a2ab69712d0c073b" -dependencies = [ - "objc", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "orbclient" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f0d54bde9774d3a51dcf281a5def240c71996bc6ca05d2c847ec8b2b216166" -dependencies = [ - "libredox 0.0.2", -] - -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "4.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76df7075c7d4d01fdcb46c912dd17fba5b60c78ea480b475f2b6ab6f666584e" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-stream" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aa2b01e1d916879f73a53d01d1d6cee68adbb31d6d9177a8cfce093cced1d50" -dependencies = [ - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "owned_ttf_parser" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b41438d2fc63c46c74a2203bf5ccd82c41ba04347b2fcf5754f230b167067d5" -dependencies = [ - "ttf-parser", -] - -[[package]] -name = "parking" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" - -[[package]] -name = "parking_lot" -version = "0.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall 0.5.1", - "smallvec", - "windows-targets 0.52.5", -] - -[[package]] -name = "parquet" -version = "42.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.21.7", - "brotli", - "bytes", - "chrono", - "flate2", - "hashbrown 0.14.5", - "lz4", - "num", - "num-bigint", - "paste", - "seq-macro", - "snap", - "thrift", - "twox-hash", - "zstd", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pathdiff" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" - -[[package]] -name = "peg" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f76678828272f177ac33b7e2ac2e3e73cc6c1cd1e3e387928aa69562fa51367" -dependencies = [ - "peg-macros", - "peg-runtime", -] - -[[package]] -name = "peg-macros" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "636d60acf97633e48d266d7415a9355d4389cea327a193f87df395d88cd2b14d" -dependencies = [ - "peg-runtime", - "proc-macro2", - "quote", -] - -[[package]] -name = "peg-runtime" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555b1514d2d99d78150d3c799d4c357a3e2c2a8062cd108e93a06d9057629c5" - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "pin-project-lite" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "piper" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464db0c665917b13ebb5d453ccdec4add5658ee1adc7affc7677615356a8afaf" -dependencies = [ - "atomic-waker", - "fastrand 2.1.0", - "futures-io", -] - -[[package]] -name = "pkg-config" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" - -[[package]] -name = "planus" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc1691dd09e82f428ce8d6310bd6d5da2557c82ff17694d2a32cad7242aea89f" -dependencies = [ - "array-init-cursor", -] - -[[package]] -name = "ply-rs" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbadf9cb4a79d516de4c64806fe64ffbd8161d1ac685d000be789fb628b88963" -dependencies = [ - "byteorder", - "linked-hash-map", - "peg", - "skeptic", -] - -[[package]] -name = "png" -version = "0.17.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1" -dependencies = [ - "bitflags 1.3.2", - "crc32fast", - "fdeflate", - "flate2", - "miniz_oxide", -] - -[[package]] -name = "poll-promise" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a58fecbf9da8965bcdb20ce4fd29788d1acee68ddbb64f0ba1b81bccdb7df" -dependencies = [ - "document-features", - "static_assertions", - "wasm-bindgen", - "wasm-bindgen-futures", -] - -[[package]] -name = "polling" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce" -dependencies = [ - "autocfg", - "bitflags 1.3.2", - "cfg-if", - "concurrent-queue", - "libc", - "log", - "pin-project-lite", - "windows-sys 0.48.0", -] - -[[package]] -name = "polling" -version = "3.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645493cf344456ef24219d02a768cf1fb92ddf8c92161679ae3d91b91a637be3" -dependencies = [ - "cfg-if", - "concurrent-queue", - "hermit-abi 0.3.9", - "pin-project-lite", - "rustix 0.38.34", - "tracing", - "windows-sys 0.52.0", -] - -[[package]] -name = "pollster" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22686f4785f02a4fcc856d3b3bb19bf6c8160d103f7a99cc258bddd0251dc7f2" - -[[package]] -name = "portable-atomic" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "presser" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" - -[[package]] -name = "pretty_env_logger" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" -dependencies = [ - "env_logger 0.7.1", - "log", -] - -[[package]] -name = "prettyplease" -version = "0.2.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" -dependencies = [ - "proc-macro2", - "syn 2.0.66", -] - -[[package]] -name = "proc-macro-crate" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" -dependencies = [ - "once_cell", - "toml_edit 0.19.15", -] - -[[package]] -name = "proc-macro2" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "profiling" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58" -dependencies = [ - "profiling-procmacros", - "puffin 0.19.0", -] - -[[package]] -name = "profiling-procmacros" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" -dependencies = [ - "quote", - "syn 2.0.66", -] - -[[package]] -name = "puffin" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02330f795caafc2007510f742624c10aa813b8c3097c77ff344b1b86eb6be846" -dependencies = [ - "anyhow", - "bincode", - "byteorder", - "cfg-if", - "lz4_flex", - "once_cell", - "parking_lot", - "serde", -] - -[[package]] -name = "puffin" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f76ad4bb049fded4e572df72cbb6381ff5d1f41f85c3a04b56e4eca287a02f" -dependencies = [ - "anyhow", - "byteorder", - "cfg-if", - "once_cell", - "parking_lot", -] - -[[package]] -name = "puffin_http" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf8e52cf00569807b02e8089a85e859c00476182730cda9718c94b12cdc31b8" -dependencies = [ - "anyhow", - "crossbeam-channel", - "log", - "puffin 0.18.1", -] - -[[package]] -name = "pulldown-cmark" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" -dependencies = [ - "bitflags 2.5.0", - "memchr", - "unicase", -] - -[[package]] -name = "quad-rand" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" - -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - -[[package]] -name = "quote" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "range-alloc" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8a99fddc9f0ba0a85884b8d14e3592853e787d581ca1816c91349b10e4eeab" - -[[package]] -name = "raw-window-handle" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "re_analytics" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8253d06d8a3401245e432d3a03bc8e68c3eec175f82b718eaf44c2b31d099dd" -dependencies = [ - "crossbeam", - "directories-next", - "ehttp", - "re_build_info", - "re_build_tools", - "re_log", - "serde", - "serde_json", - "sha2", - "thiserror", - "time", - "uuid", - "web-sys", -] - -[[package]] -name = "re_build_info" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e9587c2307e5daf15556af2f1a061a76b0b2553453090c2edf257a741763ae" - -[[package]] -name = "re_build_tools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04aee77d1d8342e8c2aec301218dac3a1572381274ca4d734ff44ffd0b69277" -dependencies = [ - "anyhow", - "cargo_metadata 0.18.1", - "glob", - "sha2", - "time", - "unindent", - "walkdir", -] - -[[package]] -name = "re_build_web_viewer" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052d56f31822e85246d2506c56131a35951e1bbb26f20bb2eaa090d7e2dcc7e8" -dependencies = [ - "anyhow", - "cargo_metadata 0.18.1", - "re_error", - "wasm-bindgen-cli-support", -] - -[[package]] -name = "re_crash_handler" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36e39b5dd131896d6a6ac987097f529af4fcb38890c21f8629486d595bbdf04c" -dependencies = [ - "backtrace", - "itertools 0.12.1", - "libc", - "parking_lot", - "re_analytics", - "re_build_info", -] - -[[package]] -name = "re_data_source" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cf15c014f8f26a86be877681aaf0b020b6e2ce5d9e2783877df801f7809a7a" -dependencies = [ - "ahash", - "anyhow", - "image 0.24.9", - "itertools 0.12.1", - "once_cell", - "parking_lot", - "rayon", - "re_build_tools", - "re_log", - "re_log_encoding", - "re_log_types", - "re_smart_channel", - "re_tracing", - "re_types", - "re_ws_comms", - "thiserror", - "walkdir", -] - -[[package]] -name = "re_data_store" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f4e023296dfdc1a08cbfb5474520b7fa872461b5702ed1cc07889f787f9de2f" -dependencies = [ - "ahash", - "arrow2", - "document-features", - "indent", - "itertools 0.12.1", - "nohash-hasher", - "once_cell", - "parking_lot", - "re_error", - "re_format", - "re_log", - "re_log_types", - "re_tracing", - "re_types_core", - "smallvec", - "thiserror", - "web-time", -] - -[[package]] -name = "re_data_ui" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1d63e08cb68a762c9c3258f52c44075eeb609e8c03249f2a56d64da9efbc92" -dependencies = [ - "ahash", - "anyhow", - "bytemuck", - "egui", - "egui_extras", - "egui_plot", - "image 0.24.9", - "itertools 0.12.1", - "re_data_store", - "re_entity_db", - "re_error", - "re_format", - "re_log", - "re_log_types", - "re_query", - "re_renderer", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", - "rfd", -] - -[[package]] -name = "re_entity_db" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6b40d7aa3adac9f2dcca95f540a46f0e640c683f0c24ec406c0b4226226d4f8" -dependencies = [ - "ahash", - "document-features", - "egui_plot", - "emath", - "getrandom", - "itertools 0.12.1", - "nohash-hasher", - "parking_lot", - "re_data_store", - "re_format", - "re_int_histogram", - "re_log", - "re_log_types", - "re_smart_channel", - "re_tracing", - "re_types_core", - "rmp-serde", - "serde", - "thiserror", - "web-time", -] - -[[package]] -name = "re_error" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd976c442b745a4a0b30e72e65841dd569c135e645969388887ac06600c94db" - -[[package]] -name = "re_format" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f7253a068deca4b2f9f75ec8f2f1a31de7330dbf42f109423b5a258a939623" -dependencies = [ - "arrow2", - "comfy-table", - "re_tuid", - "re_types_core", -] - -[[package]] -name = "re_int_histogram" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07400ae41bb83679d428f81338575f3a1bbd61343d59a13f63f4da09bb085c04" -dependencies = [ - "smallvec", - "static_assertions", -] - -[[package]] -name = "re_log" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ff89b2c17420a6395510f2c2821a86d3164ed8d86d95334b44b707e0e36b36" -dependencies = [ - "env_logger 0.10.2", - "js-sys", - "log", - "log-once", - "parking_lot", - "tracing", - "wasm-bindgen", -] - -[[package]] -name = "re_log_encoding" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202027acf7a212e18ac1fcd1ff395a402e86bd8139fd1eee2887651e19283c65" -dependencies = [ - "ehttp", - "js-sys", - "lz4_flex", - "parking_lot", - "re_build_info", - "re_log", - "re_log_types", - "re_smart_channel", - "re_tracing", - "rmp-serde", - "thiserror", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "web-time", -] - -[[package]] -name = "re_log_types" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324c8ac3ee7bdd58902474ebbc07d2cb30e4ac6ff8bf3d1280dceced5f7e7ee8" -dependencies = [ - "ahash", - "anyhow", - "arrow2", - "backtrace", - "bytemuck", - "clean-path", - "crossbeam", - "document-features", - "fixed", - "half", - "itertools 0.12.1", - "natord", - "nohash-hasher", - "num-derive", - "num-traits", - "re_format", - "re_log", - "re_string_interner", - "re_tracing", - "re_tuid", - "re_types_core", - "serde", - "serde_bytes", - "similar-asserts", - "smallvec", - "thiserror", - "time", - "typenum", - "uuid", - "web-time", -] - -[[package]] -name = "re_memory" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ca1c9e3b9ca11d8249670984f11b3e206654bea437f68645c201ffcc37cf3e2" -dependencies = [ - "ahash", - "backtrace", - "emath", - "itertools 0.12.1", - "memory-stats", - "nohash-hasher", - "once_cell", - "parking_lot", - "re_format", - "re_log", - "re_tracing", - "smallvec", - "sysinfo", - "wasm-bindgen", - "web-time", -] - -[[package]] -name = "re_query" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56dda98542f62ccf6475ab196d79036f6456552345998ec3943e8b5504e859a3" -dependencies = [ - "arrow2", - "backtrace", - "document-features", - "itertools 0.12.1", - "re_data_store", - "re_entity_db", - "re_format", - "re_log", - "re_log_types", - "re_tracing", - "re_types_core", - "thiserror", -] - -[[package]] -name = "re_renderer" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb60d0c172e364443021c72e2ca2638925a5b55ce2c0352df2b3f7ef6acaa474" -dependencies = [ - "ahash", - "anyhow", - "arrow2", - "bitflags 2.5.0", - "bytemuck", - "cfg-if", - "cfg_aliases 0.2.1", - "clean-path", - "crossbeam", - "document-features", - "ecolor", - "enumset", - "glam", - "gltf", - "half", - "itertools 0.12.1", - "macaw", - "never", - "notify", - "ordered-float 4.2.0", - "parking_lot", - "pathdiff", - "profiling", - "re_build_tools", - "re_error", - "re_log", - "re_tracing", - "serde", - "slotmap", - "smallvec", - "static_assertions", - "thiserror", - "tobj", - "type-map", - "walkdir", - "wasm-bindgen-futures", - "wgpu", - "wgpu-core", -] - -[[package]] -name = "re_sdk" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d8b167db3745c409953c476fb037c32ef72e309aff6df6a01818de170f92e7f" -dependencies = [ - "ahash", - "crossbeam", - "document-features", - "once_cell", - "parking_lot", - "re_build_info", - "re_build_tools", - "re_log", - "re_log_encoding", - "re_log_types", - "re_memory", - "re_sdk_comms", - "re_types_core", - "thiserror", -] - -[[package]] -name = "re_sdk_comms" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92cb6e9242d0cf27709be39814cd8431eab90dee27c79afcfb7877793551cef3" -dependencies = [ - "ahash", - "crossbeam", - "document-features", - "rand", - "re_log", - "re_log_encoding", - "re_log_types", - "re_smart_channel", - "thiserror", - "tokio", -] - -[[package]] -name = "re_smart_channel" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69db20bf69f784f26d3a6df0b18f3b3ab24ae18a8c4626dd47e6bc1281b685de" -dependencies = [ - "crossbeam", - "parking_lot", - "re_tracing", - "web-time", -] - -[[package]] -name = "re_space_view" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18e5a44921520cb1d89df0f0570aaac5a00b481796a1560c422cb10d13d990c7" -dependencies = [ - "egui", - "itertools 0.12.1", - "nohash-hasher", - "once_cell", - "re_entity_db", - "re_log_types", - "re_tracing", - "re_types_core", - "re_viewer_context", - "serde", - "slotmap", - "smallvec", -] - -[[package]] -name = "re_space_view_bar_chart" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4af6ecc16a814d697c147420a801230c5b3d7756d958160db014f57de800fa9d" -dependencies = [ - "egui", - "egui_plot", - "re_data_store", - "re_entity_db", - "re_log", - "re_log_types", - "re_renderer", - "re_space_view", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", -] - -[[package]] -name = "re_space_view_dataframe" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2055d682f829b536bd4c9382dacb80ff957e127153e920f47f3b7b122da069d1" -dependencies = [ - "egui", - "egui_extras", - "itertools 0.12.1", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_log", - "re_log_types", - "re_query", - "re_renderer", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", -] - -[[package]] -name = "re_space_view_spatial" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01e4d4b56d0a3c8e2d4915146a186376d51271df0926c5e52c433b1458996170" -dependencies = [ - "ahash", - "anyhow", - "bytemuck", - "egui", - "glam", - "itertools 0.12.1", - "macaw", - "nohash-hasher", - "parking_lot", - "rayon", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_error", - "re_format", - "re_log", - "re_log_types", - "re_query", - "re_renderer", - "re_space_view", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", - "serde", - "smallvec", -] - -[[package]] -name = "re_space_view_tensor" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "043699c3f520b7c2c14473b237cedc5fd8102b0770e2b7c8b1024513a31fb179" -dependencies = [ - "ahash", - "anyhow", - "bytemuck", - "egui", - "half", - "ndarray", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_log", - "re_log_types", - "re_renderer", - "re_space_view", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", - "serde", - "thiserror", - "wgpu", -] - -[[package]] -name = "re_space_view_text_document" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50fae917e72baab594f68af014c3d0391c972e8938cabb84958af7960fbb646b" -dependencies = [ - "egui", - "egui_commonmark", - "itertools 0.12.1", - "re_data_store", - "re_log", - "re_query", - "re_renderer", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", -] - -[[package]] -name = "re_space_view_text_log" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b449508c1bf026ea2016fd301dead9b1a761d94c0a358c766034134dd37a9e" -dependencies = [ - "egui", - "egui_extras", - "itertools 0.12.1", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_log", - "re_log_types", - "re_query", - "re_renderer", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", -] - -[[package]] -name = "re_space_view_time_series" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ea3515ce6bc853bde38ce0caefb4ae6cd2c1c7ce873c14ccbdafb6f8459dcd9" -dependencies = [ - "egui", - "egui_plot", - "itertools 0.12.1", - "re_data_store", - "re_format", - "re_log_types", - "re_query", - "re_renderer", - "re_space_view", - "re_tracing", - "re_types", - "re_ui", - "re_viewer_context", -] - -[[package]] -name = "re_string_interner" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b97ba3f30d93aaad0a6b1a90ce54378895d2c5b5893caffbd368fc3fc7e83db" -dependencies = [ - "ahash", - "nohash-hasher", - "once_cell", - "parking_lot", - "serde", -] - -[[package]] -name = "re_time_panel" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957fd3983716328756f93251ab11611abf8a44f09fc371edde74c071cfb8ae16" -dependencies = [ - "egui", - "itertools 0.12.1", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_format", - "re_log_types", - "re_tracing", - "re_ui", - "re_viewer_context", - "serde", - "vec1", -] - -[[package]] -name = "re_tracing" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ad438a971b668f3f554df33a843576636ea1ef97e7039f18dd89973c7f97850" -dependencies = [ - "puffin 0.18.1", - "puffin_http", - "re_log", - "rfd", -] - -[[package]] -name = "re_tuid" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb107ddea8a1f00a2f1f7faec06e7d632ebe67d1d91c7c03374dcb1addc741a6" -dependencies = [ - "document-features", - "getrandom", - "once_cell", - "serde", - "web-time", -] - -[[package]] -name = "re_types" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d1cbc4c31cee2ddddf1fff7398a68f816f2db2c5312538203658caa6ca4291" -dependencies = [ - "anyhow", - "array-init", - "arrow2", - "bytemuck", - "document-features", - "ecolor", - "glam", - "half", - "image 0.24.9", - "infer", - "itertools 0.12.1", - "linked-hash-map", - "mime_guess2", - "ndarray", - "once_cell", - "ply-rs", - "rayon", - "re_build_tools", - "re_log", - "re_tracing", - "re_types_builder", - "re_types_core", - "smallvec", - "thiserror", - "uuid", - "zune-core", - "zune-jpeg", -] - -[[package]] -name = "re_types_builder" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62e5a446f5b783e093ea6e84233dd7b06eac16acc40416ca3c08d7ddcd283749" -dependencies = [ - "anyhow", - "arrow2", - "camino", - "clang-format", - "convert_case", - "flatbuffers", - "indent", - "itertools 0.12.1", - "prettyplease", - "proc-macro2", - "quote", - "rayon", - "re_build_tools", - "re_log", - "re_tracing", - "rust-format", - "syn 2.0.66", - "tempfile", - "unindent", - "xshell", -] - -[[package]] -name = "re_types_core" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fde54b50a04466cbfaf5f875fcc3ea0591993424bcb788279bc6e02f7bf109c" -dependencies = [ - "anyhow", - "arrow2", - "backtrace", - "document-features", - "once_cell", - "re_error", - "re_string_interner", - "re_tracing", - "re_tuid", - "serde", - "smallvec", - "thiserror", -] - -[[package]] -name = "re_ui" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4aeef520baabfb4506a67980162083da822fe65c3f2a2f41e6e0518ad8cdcf12" -dependencies = [ - "egui", - "egui_commonmark", - "egui_extras", - "parking_lot", - "serde", - "serde_json", - "strum 0.24.1", - "strum_macros 0.24.3", - "sublime_fuzzy", -] - -[[package]] -name = "re_viewer" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf5b76d46e02a2605c9693f813c6f65c7300af97e4596fadaead1d686b3aea7b" -dependencies = [ - "ahash", - "anyhow", - "bytemuck", - "cfg-if", - "eframe", - "egui", - "egui-wgpu", - "egui_plot", - "egui_tiles", - "ehttp", - "image 0.24.9", - "itertools 0.12.1", - "once_cell", - "poll-promise", - "re_analytics", - "re_build_info", - "re_build_tools", - "re_data_source", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_error", - "re_format", - "re_log", - "re_log_encoding", - "re_log_types", - "re_memory", - "re_renderer", - "re_smart_channel", - "re_space_view", - "re_space_view_bar_chart", - "re_space_view_dataframe", - "re_space_view_spatial", - "re_space_view_tensor", - "re_space_view_text_document", - "re_space_view_text_log", - "re_space_view_time_series", - "re_time_panel", - "re_tracing", - "re_types", - "re_types_core", - "re_ui", - "re_viewer_context", - "re_viewport", - "re_ws_comms", - "rfd", - "ron", - "serde", - "serde_json", - "thiserror", - "time", - "wasm-bindgen-futures", - "web-time", - "wgpu", -] - -[[package]] -name = "re_viewer_context" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce26e507584cce91278926d7e0ac74abb814613efd33ae6ce168d25189d410d" -dependencies = [ - "ahash", - "anyhow", - "arboard", - "bit-vec", - "bytemuck", - "egui", - "egui-wgpu", - "egui_tiles", - "glam", - "half", - "itertools 0.12.1", - "macaw", - "ndarray", - "nohash-hasher", - "once_cell", - "parking_lot", - "re_data_source", - "re_data_store", - "re_entity_db", - "re_log", - "re_log_types", - "re_query", - "re_renderer", - "re_string_interner", - "re_tracing", - "re_types", - "re_ui", - "serde", - "slotmap", - "smallvec", - "thiserror", - "uuid", - "wgpu", -] - -[[package]] -name = "re_viewport" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a954ab8f2528939e2c1837ce02700286d75aacbd966765fd6b927868f79a9ac0" -dependencies = [ - "ahash", - "arrow2", - "egui", - "egui_tiles", - "glam", - "image 0.24.9", - "itertools 0.12.1", - "nohash-hasher", - "once_cell", - "rayon", - "re_data_store", - "re_data_ui", - "re_entity_db", - "re_log", - "re_log_types", - "re_query", - "re_renderer", - "re_space_view", - "re_space_view_time_series", - "re_tracing", - "re_types", - "re_types_core", - "re_ui", - "re_viewer_context", - "rmp-serde", - "tinyvec", -] - -[[package]] -name = "re_web_viewer_server" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "764f7ded58dd08eb603729d33ec1d9d8e4d48d4712c42407f86a380a9df322bf" -dependencies = [ - "clap", - "document-features", - "futures-util", - "hyper", - "re_analytics", - "re_build_tools", - "re_build_web_viewer", - "re_error", - "re_log", - "thiserror", - "tokio", - "webbrowser", -] - -[[package]] -name = "re_ws_comms" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "679253d4eddde00edd5294d8b1e940762f9a1c66a41e4b43b4c71f55e01fdfd6" -dependencies = [ - "anyhow", - "bincode", - "document-features", - "ewebsock", - "re_format", - "re_log", - "re_log_types", - "re_memory", - "re_tracing", - "thiserror", -] - -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" -dependencies = [ - "bitflags 2.5.0", -] - -[[package]] -name = "redox_users" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" -dependencies = [ - "getrandom", - "libredox 0.1.3", - "thiserror", -] - -[[package]] -name = "regex" -version = "1.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata 0.4.6", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-automata" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-lite" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" - -[[package]] -name = "regex-syntax" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" - -[[package]] -name = "renderdoc-sys" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" - -[[package]] -name = "rerun" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce557407411076b9dbfb1bf25659aaec60737158ee4305f8bc565fde81b2cdf" -dependencies = [ - "anyhow", - "clap", - "document-features", - "env_logger 0.10.2", - "itertools 0.12.1", - "log", - "puffin 0.18.1", - "rayon", - "re_analytics", - "re_build_info", - "re_build_tools", - "re_crash_handler", - "re_data_source", - "re_entity_db", - "re_format", - "re_log", - "re_log_encoding", - "re_log_types", - "re_memory", - "re_sdk", - "re_sdk_comms", - "re_smart_channel", - "re_tracing", - "re_types", - "re_viewer", - "re_web_viewer_server", - "re_ws_comms", - "tokio", -] - -[[package]] -name = "rfd" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c9e7b57df6e8472152674607f6cc68aa14a748a3157a857a94f516e11aeacc2" -dependencies = [ - "ashpd", - "async-io 1.13.0", - "block", - "dispatch", - "futures-util", - "js-sys", - "log", - "objc", - "objc-foundation", - "objc_id", - "pollster", - "raw-window-handle", - "urlencoding", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "windows-sys 0.48.0", -] - -[[package]] -name = "ring" -version = "0.17.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" -dependencies = [ - "cc", - "cfg-if", - "getrandom", - "libc", - "spin", - "untrusted", - "windows-sys 0.52.0", -] - -[[package]] -name = "rle-decode-fast" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" - -[[package]] -name = "rmp" -version = "0.8.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" -dependencies = [ - "byteorder", - "num-traits", - "paste", -] - -[[package]] -name = "rmp-serde" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" -dependencies = [ - "byteorder", - "rmp", - "serde", -] - -[[package]] -name = "ron" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" -dependencies = [ - "base64 0.21.7", - "bitflags 2.5.0", - "serde", - "serde_derive", -] - -[[package]] -name = "rusqlite" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" -dependencies = [ - "bitflags 2.5.0", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "smallvec", -] - -[[package]] -name = "rust-format" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60e7c00b6c3bf5e38a880eec01d7e829d12ca682079f8238a464def3c4b31627" - -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "0.37.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" -dependencies = [ - "bitflags 1.3.2", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.3.8", - "windows-sys 0.48.0", -] - -[[package]] -name = "rustix" -version = "0.38.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" -dependencies = [ - "bitflags 2.5.0", - "errno", - "libc", - "linux-raw-sys 0.4.14", - "windows-sys 0.52.0", -] - -[[package]] -name = "rustls" -version = "0.22.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" -dependencies = [ - "log", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-pki-types" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" - -[[package]] -name = "rustls-webpki" -version = "0.102.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - -[[package]] -name = "rustversion" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" - -[[package]] -name = "sage-core" -version = "0.14.7" -source = "git+https://github.com/lazear/sage.git?rev=9e870429889b341c4773df32b65e553283301a93#9e870429889b341c4773df32b65e553283301a93" -dependencies = [ - "dashmap", - "fnv", - "itertools 0.10.5", - "log", - "rayon", - "regex", - "serde", -] - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" -dependencies = [ - "serde", -] - -[[package]] -name = "seq-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" - -[[package]] -name = "serde" -version = "1.0.202" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_bytes" -version = "0.11.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" -dependencies = [ - "serde", -] - -[[package]] -name = "serde_derive" -version = "1.0.202" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "serde_json" -version = "1.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_repr" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "serde_spanned" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" -dependencies = [ - "serde", -] - -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" -dependencies = [ - "libc", -] - -[[package]] -name = "simd-adler32" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" - -[[package]] -name = "simdutf8" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" - -[[package]] -name = "similar" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640" -dependencies = [ - "bstr", - "unicode-segmentation", -] - -[[package]] -name = "similar-asserts" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e041bb827d1bfca18f213411d51b665309f1afb37a04a5d1464530e13779fc0f" -dependencies = [ - "console", - "similar", -] - -[[package]] -name = "skeptic" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16d23b015676c90a0f01c197bfdc786c20342c73a0afdda9025adb0bc42940a8" -dependencies = [ - "bytecount", - "cargo_metadata 0.14.2", - "error-chain", - "glob", - "pulldown-cmark", - "tempfile", - "walkdir", -] - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "slotmap" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" -dependencies = [ - "serde", - "version_check", -] - -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -dependencies = [ - "serde", -] - -[[package]] -name = "smithay-client-toolkit" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870427e30b8f2cbe64bf43ec4b86e88fe39b0a84b3f15efd9c9c2d020bc86eb9" -dependencies = [ - "bitflags 1.3.2", - "calloop", - "dlib", - "lazy_static", - "log", - "memmap2 0.5.10", - "nix 0.24.3", - "pkg-config", - "wayland-client", - "wayland-cursor", - "wayland-protocols", -] - -[[package]] -name = "smithay-clipboard" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a345c870a1fae0b1b779085e81b51e614767c239e93503588e54c5b17f4b0e8" -dependencies = [ - "smithay-client-toolkit", - "wayland-client", -] - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "socket2" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "socket2" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] - -[[package]] -name = "spirv" -version = "0.2.0+1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "246bfa38fe3db3f1dfc8ca5a2cdeb7348c78be2112740cc0ec8ef18b6d94f830" -dependencies = [ - "bitflags 1.3.2", - "num-traits", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" -dependencies = [ - "strum_macros 0.24.3", -] - -[[package]] -name = "strum" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] - -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.66", -] - -[[package]] -name = "sublime_fuzzy" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7986063f7c0ab374407e586d7048a3d5aac94f103f751088bf398e07cd5400" - -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "sysinfo" -version = "0.30.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732ffa00f53e6b2af46208fba5718d9662a421049204e156328b66791ffa15ae" -dependencies = [ - "cfg-if", - "core-foundation-sys", - "libc", - "ntapi", - "once_cell", - "windows 0.52.0", -] - -[[package]] -name = "tempfile" -version = "3.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" -dependencies = [ - "cfg-if", - "fastrand 2.1.0", - "rustix 0.38.34", - "windows-sys 0.52.0", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "thiserror" -version = "1.0.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float 2.10.1", -] - -[[package]] -name = "tiff" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" -dependencies = [ - "flate2", - "jpeg-decoder", - "weezl", -] - -[[package]] -name = "time" -version = "0.3.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" -dependencies = [ - "deranged", - "itoa", - "js-sys", - "libc", - "num-conv", - "num_threads", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - -[[package]] -name = "time-macros" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "timsrust" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9301709a549fabb2d79f564c528b0af5ca0002bdf0055341cfcc07950a44290b" -dependencies = [ - "bytemuck", - "byteorder", - "linreg", - "memmap2 0.9.4", - "parquet", - "rayon", - "rusqlite", - "thiserror", - "zstd", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tobj" -version = "4.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3bd4ba05f29e4c65b6c0c11a58b6465ffa820bac890d76ad407b4e81d8372e8" -dependencies = [ - "ahash", -] - -[[package]] -name = "tokio" -version = "1.37.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "pin-project-lite", - "socket2 0.5.7", - "tokio-macros", - "windows-sys 0.48.0", -] - -[[package]] -name = "tokio-macros" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "tokio-util" -version = "0.7.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "toml" -version = "0.8.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit 0.22.13", -] - -[[package]] -name = "toml_datetime" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" -dependencies = [ - "serde", -] - -[[package]] -name = "toml_edit" -version = "0.19.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" -dependencies = [ - "indexmap 2.2.6", - "toml_datetime", - "winnow 0.5.40", -] - -[[package]] -name = "toml_edit" -version = "0.22.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" -dependencies = [ - "indexmap 2.2.6", - "serde", - "serde_spanned", - "toml_datetime", - "winnow 0.6.8", -] - -[[package]] -name = "tower-service" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] -name = "tracing" -version = "0.1.40" +name = "memmap2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" dependencies = [ - "log", - "pin-project-lite", - "tracing-attributes", - "tracing-core", + "libc", ] [[package]] -name = "tracing-attributes" -version = "0.1.27" +name = "miniz_oxide" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", + "adler", ] [[package]] -name = "tracing-core" -version = "0.1.32" +name = "num" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" dependencies = [ - "once_cell", + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", ] [[package]] -name = "try-lock" -version = "0.2.5" +name = "num-bigint" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +dependencies = [ + "num-integer", + "num-traits", +] [[package]] -name = "ttf-parser" -version = "0.21.1" +name = "num-complex" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c591d83f69777866b9126b24c6dd9a18351f177e49d625920d19f989fd31cf8" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] [[package]] -name = "tungstenite" -version = "0.21.0" +name = "num-integer" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "byteorder", - "bytes", - "data-encoding", - "http 1.1.0", - "httparse", - "log", - "rand", - "sha1", - "thiserror", - "url", - "utf-8", + "num-traits", ] [[package]] -name = "twox-hash" -version = "1.6.3" +name = "num-iter" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" dependencies = [ - "cfg-if", - "static_assertions", + "autocfg", + "num-integer", + "num-traits", ] [[package]] -name = "type-map" -version = "0.5.0" +name = "num-rational" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb68604048ff8fa93347f02441e4487594adc20bb8a084f9e564d2b827a0a9f" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ - "rustc-hash", + "num-bigint", + "num-integer", + "num-traits", ] [[package]] -name = "typed-builder" -version = "0.16.2" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34085c17941e36627a879208083e25d357243812c30e7d7387c3b954f30ade16" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "typed-builder-macro", + "autocfg", + "libm", ] [[package]] -name = "typed-builder-macro" -version = "0.16.2" +name = "number_prefix" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] -name = "typenum" -version = "1.17.0" +name = "once_cell" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] -name = "uds_windows" -version = "1.1.0" +name = "ordered-float" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89daebc3e6fd160ac4aa9fc8b3bf71e1f74fbf92367ae71fb83a037e8bf164b9" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ - "memoffset 0.9.1", - "tempfile", - "winapi", + "num-traits", ] [[package]] -name = "unicase" -version = "2.7.0" +name = "parking_lot_core" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ - "version_check", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", ] [[package]] -name = "unicode-bidi" -version = "0.3.15" +name = "parquet" +version = "42.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "baab9c36b1c8300b81b4d577d306a0a733f9d34021363098d3548e37757ed6c8" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "hashbrown", + "lz4", + "num", + "num-bigint", + "paste", + "seq-macro", + "snap", + "thrift", + "twox-hash", + "zstd", +] [[package]] -name = "unicode-ident" -version = "1.0.12" +name = "paste" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] -name = "unicode-normalization" -version = "0.1.23" +name = "pkg-config" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] -name = "unicode-segmentation" -version = "1.11.0" +name = "portable-atomic" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" [[package]] -name = "unicode-width" -version = "0.1.12" +name = "ppv-lite86" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] -name = "unicode-xid" -version = "0.2.4" +name = "pretty_env_logger" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +dependencies = [ + "env_logger", + "log", +] [[package]] -name = "unindent" -version = "0.1.11" +name = "proc-macro2" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +dependencies = [ + "unicode-ident", +] [[package]] -name = "untrusted" -version = "0.9.0" +name = "quick-error" +version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] -name = "ureq" -version = "2.9.7" +name = "quote" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ - "base64 0.22.1", - "flate2", - "log", - "once_cell", - "rustls", - "rustls-pki-types", - "rustls-webpki", - "url", - "webpki-roots", + "proc-macro2", ] [[package]] -name = "url" -version = "2.5.0" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", + "libc", + "rand_chacha", + "rand_core", ] [[package]] -name = "urlencoding" -version = "2.1.3" +name = "rand_chacha" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] [[package]] -name = "utf-8" -version = "0.7.6" +name = "rand_core" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] [[package]] -name = "utf8parse" -version = "0.2.1" +name = "rayon" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] [[package]] -name = "uuid" -version = "1.8.0" +name = "rayon-core" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ - "getrandom", - "serde", - "wasm-bindgen", + "crossbeam-deque", + "crossbeam-utils", ] [[package]] -name = "vcpkg" -version = "0.2.15" +name = "redox_syscall" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] [[package]] -name = "vec1" -version = "1.12.0" +name = "regex" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb60dcfffc189bfd4e2a81333c268619fee9db53da71bce2bcbd8e129c56936" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] [[package]] -name = "vec_map" -version = "0.8.2" +name = "regex-automata" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] [[package]] -name = "version_check" -version = "0.9.4" +name = "regex-syntax" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] -name = "waker-fn" -version = "1.2.0" +name = "rusqlite" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" +checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +dependencies = [ + "bitflags 2.5.0", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] [[package]] -name = "walkdir" -version = "2.5.0" +name = "rustc_version" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "same-file", - "winapi-util", + "semver", ] [[package]] -name = "walrus" -version = "0.20.3" +name = "ryu" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c03529cd0c4400a2449f640d2f27cd1b48c3065226d15e26d98e4429ab0adb7" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "sage-core" +version = "0.14.7" +source = "git+https://github.com/lazear/sage.git?rev=9e870429889b341c4773df32b65e553283301a93#9e870429889b341c4773df32b65e553283301a93" dependencies = [ - "anyhow", - "gimli 0.26.2", - "id-arena", - "leb128", + "dashmap", + "fnv", + "itertools", "log", - "walrus-macro", - "wasm-encoder", - "wasmparser", + "rayon", + "regex", + "serde", ] [[package]] -name = "walrus-macro" -version = "0.19.0" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e5bd22c71e77d60140b0bd5be56155a37e5bd14e24f5f87298040d0cc40d7" -dependencies = [ - "heck 0.3.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "want" -version = "0.3.1" +name = "semver" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +name = "seq-macro" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] -name = "wasm-bindgen" -version = "0.2.92" +name = "serde" +version = "1.0.202" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" dependencies = [ - "cfg-if", - "wasm-bindgen-macro", + "serde_derive", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.92" +name = "serde_derive" +version = "1.0.202" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" dependencies = [ - "bumpalo", - "log", - "once_cell", "proc-macro2", "quote", "syn 2.0.66", - "wasm-bindgen-shared", ] [[package]] -name = "wasm-bindgen-cli-support" -version = "0.2.92" +name = "serde_json" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca821da8c1ae6c87c5e94493939a206daa8587caff227c6032e0061a3d80817f" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ - "anyhow", - "base64 0.21.7", - "log", - "rustc-demangle", - "serde_json", - "tempfile", - "unicode-ident", - "walrus", - "wasm-bindgen-externref-xform", - "wasm-bindgen-multi-value-xform", - "wasm-bindgen-shared", - "wasm-bindgen-threads-xform", - "wasm-bindgen-wasm-conventions", - "wasm-bindgen-wasm-interpreter", + "itoa", + "ryu", + "serde", ] [[package]] -name = "wasm-bindgen-externref-xform" -version = "0.2.92" +name = "serde_spanned" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "102582726b35a30d53157fbf8de3d0f0fed4c40c0c7951d69a034e9ef01da725" +checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" dependencies = [ - "anyhow", - "walrus", + "serde", ] [[package]] -name = "wasm-bindgen-futures" -version = "0.4.42" +name = "smallvec" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] -name = "wasm-bindgen-macro" -version = "0.2.92" +name = "snap" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.92" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "wasm-bindgen-multi-value-xform" -version = "0.2.92" +name = "strsim" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3498e4799f43523d780ceff498f04d882a8dbc9719c28020034822e5952f32a4" -dependencies = [ - "anyhow", - "walrus", -] +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] -name = "wasm-bindgen-shared" -version = "0.2.92" +name = "syn" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] [[package]] -name = "wasm-bindgen-threads-xform" -version = "0.2.92" +name = "syn" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d5add359b7f7d09a55299a9d29be54414264f2b8cf84f8c8fda5be9269b5dd9" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ - "anyhow", - "walrus", - "wasm-bindgen-wasm-conventions", + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] -name = "wasm-bindgen-wasm-conventions" -version = "0.2.92" +name = "termcolor" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c04e3607b810e76768260db3a5f2e8beb477cb089ef8726da85c8eb9bd3b575" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ - "anyhow", - "walrus", + "winapi-util", ] [[package]] -name = "wasm-bindgen-wasm-interpreter" -version = "0.2.92" +name = "thiserror" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea966593c8243a33eb4d643254eb97a69de04e89462f46cf6b4f506aae89b3a" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" dependencies = [ - "anyhow", - "log", - "walrus", - "wasm-bindgen-wasm-conventions", + "thiserror-impl", ] [[package]] -name = "wasm-encoder" -version = "0.29.0" +name = "thiserror-impl" +version = "1.0.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18c41dbd92eaebf3612a39be316540b8377c871cb9bde6b064af962984912881" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ - "leb128", + "proc-macro2", + "quote", + "syn 2.0.66", ] [[package]] -name = "wasm-streams" -version = "0.3.0" +name = "thrift" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", + "byteorder", + "integer-encoding", + "ordered-float", ] [[package]] -name = "wasmparser" -version = "0.80.2" +name = "timsrust" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "449167e2832691a1bff24cde28d2804e90e09586a448c8e76984792c44334a6b" +checksum = "9301709a549fabb2d79f564c528b0af5ca0002bdf0055341cfcc07950a44290b" +dependencies = [ + "bytemuck", + "byteorder", + "linreg", + "memmap2", + "parquet", + "rayon", + "rusqlite", + "thiserror", + "zstd", +] [[package]] -name = "wayland-client" -version = "0.29.5" +name = "tiny-keccak" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f3b068c05a039c9f755f881dc50f01732214f5685e379829759088967c46715" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" dependencies = [ - "bitflags 1.3.2", - "downcast-rs", - "libc", - "nix 0.24.3", - "scoped-tls", - "wayland-commons", - "wayland-scanner", - "wayland-sys", + "crunchy", ] [[package]] -name = "wayland-commons" -version = "0.29.5" +name = "toml" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8691f134d584a33a6606d9d717b95c4fa20065605f798a3f350d78dced02a902" +checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" dependencies = [ - "nix 0.24.3", - "once_cell", - "smallvec", - "wayland-sys", + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", ] [[package]] -name = "wayland-cursor" -version = "0.29.5" +name = "toml_datetime" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6865c6b66f13d6257bef1cd40cbfe8ef2f150fb8ebbdb1e8e873455931377661" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" dependencies = [ - "nix 0.24.3", - "wayland-client", - "xcursor", + "serde", ] [[package]] -name = "wayland-protocols" -version = "0.29.5" +name = "toml_edit" +version = "0.22.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b950621f9354b322ee817a23474e479b34be96c2e909c14f7bc0100e9a970bc6" +checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" dependencies = [ - "bitflags 1.3.2", - "wayland-client", - "wayland-commons", - "wayland-scanner", + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", ] [[package]] -name = "wayland-scanner" -version = "0.29.5" +name = "twox-hash" +version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4303d8fa22ab852f789e75a967f0a2cdc430a607751c0499bada3e451cbd53" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "proc-macro2", - "quote", - "xml-rs", + "cfg-if", + "static_assertions", ] [[package]] -name = "wayland-sys" -version = "0.29.5" +name = "unicode-ident" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be12ce1a3c39ec7dba25594b97b42cb3195d54953ddb9d3d95a7c3902bc6e9d4" -dependencies = [ - "dlib", - "lazy_static", - "pkg-config", -] +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] -name = "web-sys" -version = "0.3.64" +name = "unicode-width" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" -dependencies = [ - "js-sys", - "wasm-bindgen", -] +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" [[package]] -name = "web-time" -version = "0.2.4" +name = "utf8parse" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa30049b1c872b72c89866d458eae9f20380ab280ffd1b1e18df2d3e2d98cfe0" -dependencies = [ - "js-sys", - "wasm-bindgen", -] +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] -name = "webbrowser" -version = "0.8.15" +name = "vcpkg" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db67ae75a9405634f5882791678772c94ff5f16a66535aae186e26aa0841fc8b" -dependencies = [ - "core-foundation", - "home", - "jni", - "log", - "ndk-context", - "objc", - "raw-window-handle", - "url", - "web-sys", -] +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] -name = "webpki-roots" -version = "0.26.1" +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" -dependencies = [ - "rustls-pki-types", -] +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "weezl" -version = "0.1.8" +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] -name = "wgpu" -version = "0.18.0" +name = "wasm-bindgen" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e7d227c9f961f2061c26f4cb0fbd4df0ef37e056edd0931783599d6c94ef24" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ - "arrayvec", "cfg-if", - "flume", - "js-sys", - "log", - "naga", - "parking_lot", - "profiling", - "raw-window-handle", - "smallvec", - "static_assertions", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "wgpu-core", - "wgpu-hal", - "wgpu-types", + "wasm-bindgen-macro", ] [[package]] -name = "wgpu-core" -version = "0.18.1" +name = "wasm-bindgen-backend" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef91c1d62d1e9e81c79e600131a258edf75c9531cbdbde09c44a011a47312726" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ - "arrayvec", - "bit-vec", - "bitflags 2.5.0", - "codespan-reporting", + "bumpalo", "log", - "naga", - "parking_lot", - "profiling", - "raw-window-handle", - "rustc-hash", - "smallvec", - "thiserror", - "web-sys", - "wgpu-hal", - "wgpu-types", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-shared", ] [[package]] -name = "wgpu-hal" -version = "0.18.1" +name = "wasm-bindgen-macro" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84ecc802da3eb67b4cf3dd9ea6fe45bbb47ef13e6c49c5c3240868a9cc6cdd9" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ - "android_system_properties", - "arrayvec", - "ash", - "bit-set", - "bitflags 2.5.0", - "block", - "core-graphics-types", - "d3d12", - "glow 0.13.1", - "glutin_wgl_sys", - "gpu-alloc", - "gpu-allocator", - "gpu-descriptor", - "hassle-rs", - "js-sys", - "khronos-egl", - "libc", - "libloading 0.8.3", - "log", - "metal", - "naga", - "objc", - "once_cell", - "parking_lot", - "profiling", - "range-alloc", - "raw-window-handle", - "renderdoc-sys", - "rustc-hash", - "smallvec", - "thiserror", - "wasm-bindgen", - "web-sys", - "wgpu-types", - "winapi", + "quote", + "wasm-bindgen-macro-support", ] [[package]] -name = "wgpu-types" -version = "0.18.0" +name = "wasm-bindgen-macro-support" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d5ed5f0edf0de351fe311c53304986315ce866f394a2e6df0c4b3c70774bcdd" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ - "bitflags 2.5.0", - "js-sys", - "web-sys", + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", ] [[package]] -name = "widestring" -version = "1.1.0" +name = "wasm-bindgen-shared" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "winapi" @@ -7026,7 +1557,7 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -7035,93 +1566,13 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-targets 0.48.5", -] - -[[package]] -name = "windows" -version = "0.51.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca229916c5ee38c2f2bc1e9d8f04df975b4bd93f9955dc69fabb5d91270045c9" -dependencies = [ - "windows-core 0.51.1", - "windows-targets 0.48.5", -] - -[[package]] -name = "windows" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" -dependencies = [ - "windows-core 0.52.0", - "windows-targets 0.52.5", -] - -[[package]] -name = "windows-core" -version = "0.51.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows-core" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", -] - -[[package]] -name = "windows-implement" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e2ee588991b9e7e6c8338edf3333fbe4da35dc72092643958ebb43f0ab2c49c" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "windows-interface" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6fb8df20c9bcaa8ad6ab513f7b40104840c8867d5751126e4df3b08388d0cc7" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", + "windows-targets", ] [[package]] @@ -7130,37 +1581,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", -] - -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] @@ -7169,64 +1590,28 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.5" @@ -7239,121 +1624,30 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" -[[package]] -name = "winit" -version = "0.28.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9596d90b45384f5281384ab204224876e8e8bf7d58366d9b795ad99aa9894b94" -dependencies = [ - "android-activity", - "bitflags 1.3.2", - "cfg_aliases 0.1.1", - "core-foundation", - "core-graphics 0.22.3", - "dispatch", - "instant", - "libc", - "log", - "mio", - "ndk", - "objc2 0.3.0-beta.3.patch-leaks.3", - "once_cell", - "orbclient", - "percent-encoding", - "raw-window-handle", - "redox_syscall 0.3.5", - "smithay-client-toolkit", - "wasm-bindgen", - "wayland-client", - "wayland-commons", - "wayland-protocols", - "wayland-scanner", - "web-sys", - "windows-sys 0.45.0", - "x11-dl", -] - -[[package]] -name = "winnow" -version = "0.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" -dependencies = [ - "memchr", -] - [[package]] name = "winnow" version = "0.6.8" @@ -7363,137 +1657,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "x11-dl" -version = "2.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38735924fedd5314a6e548792904ed8c6de6636285cb9fec04d5b1db85c1516f" -dependencies = [ - "libc", - "once_cell", - "pkg-config", -] - -[[package]] -name = "x11rb" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d91ffca73ee7f68ce055750bf9f6eca0780b8c85eff9bc046a3b0da41755e12" -dependencies = [ - "gethostname", - "rustix 0.38.34", - "x11rb-protocol", -] - -[[package]] -name = "x11rb-protocol" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d" - -[[package]] -name = "xcursor" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a0ccd7b4a5345edfcd0c3535718a4e9ff7798ffc536bb5b5a0e26ff84732911" - -[[package]] -name = "xdg-home" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e5a325c3cb8398ad6cf859c1135b25dd29e186679cf2da7581d9679f63b38e" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "xml-rs" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791978798f0597cfc70478424c2b4fdc2b7a8024aaff78497ef00f24ef674193" - -[[package]] -name = "xshell" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db0ab86eae739efd1b054a8d3d16041914030ac4e01cd1dca0cf252fd8b6437" -dependencies = [ - "xshell-macros", -] - -[[package]] -name = "xshell-macros" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d422e8e38ec76e2f06ee439ccc765e9c6a9638b9e7c9f2e8255e4d41e8bd852" - -[[package]] -name = "zbus" -version = "3.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "675d170b632a6ad49804c8cf2105d7c31eddd3312555cffd4b740e08e97c25e6" -dependencies = [ - "async-broadcast", - "async-executor", - "async-fs 1.6.0", - "async-io 1.13.0", - "async-lock 2.8.0", - "async-process", - "async-recursion", - "async-task", - "async-trait", - "blocking", - "byteorder", - "derivative", - "enumflags2", - "event-listener 2.5.3", - "futures-core", - "futures-sink", - "futures-util", - "hex", - "nix 0.26.4", - "once_cell", - "ordered-stream", - "rand", - "serde", - "serde_repr", - "sha1", - "static_assertions", - "tracing", - "uds_windows", - "winapi", - "xdg-home", - "zbus_macros", - "zbus_names", - "zvariant", -] - -[[package]] -name = "zbus_macros" -version = "3.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7131497b0f887e8061b430c530240063d33bf9455fa34438f388a245da69e0a5" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "regex", - "syn 1.0.109", - "zvariant_utils", -] - -[[package]] -name = "zbus_names" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "437d738d3750bed6ca9b8d423ccc7a8eb284f6b1d6d4e225a0e4e6258d864c8d" -dependencies = [ - "serde", - "static_assertions", - "zvariant", -] - [[package]] name = "zerocopy" version = "0.7.34" @@ -7514,12 +1677,6 @@ dependencies = [ "syn 2.0.66", ] -[[package]] -name = "zeroize" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" - [[package]] name = "zstd" version = "0.12.4" @@ -7548,57 +1705,3 @@ dependencies = [ "cc", "pkg-config", ] - -[[package]] -name = "zune-core" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" - -[[package]] -name = "zune-jpeg" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448" -dependencies = [ - "zune-core", -] - -[[package]] -name = "zvariant" -version = "3.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eef2be88ba09b358d3b58aca6e41cd853631d44787f319a1383ca83424fb2db" -dependencies = [ - "byteorder", - "enumflags2", - "libc", - "serde", - "static_assertions", - "url", - "zvariant_derive", -] - -[[package]] -name = "zvariant_derive" -version = "3.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c24dc0bed72f5f90d1f8bb5b07228cbf63b3c6e9f82d82559d4bae666e7ed9" -dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 1.0.109", - "zvariant_utils", -] - -[[package]] -name = "zvariant_utils" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7234f0d811589db492d16893e3f21e8e2fd282e6d01b0cddee310322062cc200" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] diff --git a/Cargo.toml b/Cargo.toml index 09556be..8bdcb58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ionmesh" -version = "0.1.0" +version = "0.2.0" edition = "2021" license = "Apache-2.0" @@ -13,28 +13,22 @@ rusqlite = { version = "0.29.0", features = ["bundled"] } # Serialization serde = { version = "1.0.193", features = ["derive"] } serde_json = "1.0.108" -apache-avro = "0.16.0" rayon = "1.8.0" indicatif = {version = "*", features = ["rayon"]} - -# for viz feature -rerun = "0.12.0" -rand = "0.8.5" - log = "0.4" pretty_env_logger = "0.4" num = "0.4.1" num-traits = "0.2.18" -clap = "4.4.17" +clap = {version = "4.4.17", features = ["derive"]} csv = "1.3.0" +rand = "0.8.5" + # Sage sage-core = { git = "https://github.com/lazear/sage.git", rev = "9e870429889b341c4773df32b65e553283301a93" } toml = "0.8.8" [features] -viz = [] -f64 = [] par_dataprep = [] # [profile.release] diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan.rs index feab118..ab8b407 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan.rs @@ -13,7 +13,6 @@ use crate::utils; /// /// 1. Intensity usage. /// -use crate::mod_types::Float; use crate::ms::frames; use crate::space::space_generics::{HasIntensity, IndexedPoints, NDPoint}; use indicatif::ProgressIterator; @@ -168,7 +167,7 @@ fn _dbscan< filter_fun: Option, converter: C, progress: bool, - max_extension_distances: &[Float;N], + max_extension_distances: &[f32;N], ) -> (u64, Vec>) { let mut initial_candidates_counts = utils::RollingSDCalculator::default(); let mut final_candidates_counts = utils::RollingSDCalculator::default(); @@ -587,7 +586,7 @@ fn reassign_centroid< elements: &Vec, def_aggregator: F, log_level: utils::LogLevel, - expansion_factors: &[Float;N], + expansion_factors: &[f32;N], ) -> Vec { let mut timer = utils::ContextTimer::new("reassign_centroid", true, log_level); let mut out = Vec::with_capacity(centroids.len()); @@ -645,7 +644,7 @@ pub fn dbscan_generic< extra_filter_fun: Option<&FF>, log_level: Option, keep_unclustered: bool, - max_extension_distances: &[Float;N], + max_extension_distances: &[f32;N], back_converter: Option, ) -> Vec { let show_progress = log_level.is_some(); @@ -741,8 +740,8 @@ impl NDPointConverter for DenseFrameConverter { fn convert(&self, elem: &TimsPeak) -> NDPoint<2> { NDPoint { values: [ - (elem.mz / self.mz_scaling) as Float, - (elem.mobility / self.ims_scaling) as Float, + (elem.mz / self.mz_scaling) as f32, + (elem.mobility / self.ims_scaling) as f32, ], } } @@ -750,7 +749,7 @@ impl NDPointConverter for DenseFrameConverter { type FFTimsPeak = fn(&TimsPeak, &TimsPeak) -> bool; // bool> -pub fn dbscan_denseframes( +pub fn dbscan_denseframe( mut denseframe: frames::DenseFrame, mz_scaling: f64, max_mz_extension: f64, @@ -798,7 +797,7 @@ pub fn dbscan_denseframes( None::<&FFTimsPeak>, None, true, - &[max_mz_extension as Float, max_ims_extension as Float], + &[max_mz_extension as f32, max_ims_extension as f32], None::, ); diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index a3c6b6c..4a6cae1 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -1,5 +1,4 @@ pub mod dbscan; pub mod ms_denoise; -pub mod trace_combination; pub mod tracing; -pub mod chromatograms; \ No newline at end of file +pub mod chromatograms; diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index be71368..739a360 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -7,7 +7,6 @@ use crate::ms::frames::DenseFrameWindow; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; use crate::utils; -use crate::visualization::RerunPlottable; use indicatif::ParallelProgressIterator; use log::{info, trace, warn}; @@ -117,7 +116,7 @@ fn _denoise_denseframe( let index = frame.index; // this is the line that matters // TODO move the scalings to parameters - let denoised_frame = dbscan::dbscan_denseframes( + let denoised_frame = dbscan::dbscan_denseframe( frame, mz_scaling, max_mz_extension, @@ -185,78 +184,35 @@ fn _denoise_dia_frame( trait Denoiser<'a, T, W, X, Z> where - T: RerunPlottable + std::marker::Send, - W: Clone + RerunPlottable + std::marker::Send, + T: std::marker::Send, + W: Clone + std::marker::Send, X: Clone, Z: Clone, Vec: IntoParallelIterator, { - fn denoise(&self, frame: T) -> W; - // TODO maybe add a par_denoise_slice method - // with the implementation ... + fn denoise(&self, elem: T) -> W { + unimplemented!() + } + fn par_denoise_slice( &self, - mut frames: Vec, - record_stream: &mut Option, - plotting_extras: (X, Z), + elems: Vec, ) -> Vec where Self: Sync, { - info!("Denoising {} frames", frames.len()); + info!("Denoising {} frames", elems.len()); // randomly viz 1/200 frames // Selecting a slice of 1/200 frames - if let Some(stream) = record_stream.as_mut() { - warn!("Viz is enabled, randomly subsetting 1/200 frames"); - let len_keep = frames.len() / 200; - let start = rand::random::() % (frames.len() - len_keep); - // let keep = [false, true, true, false, true]; - let mut keep = vec![false; start] - .into_iter() - .chain(vec![true; len_keep]) - .collect::>(); - - keep.append(&mut vec![false; frames.len() - start - len_keep]); - let mut iter_keep = keep.iter(); - - frames.retain(|_| *iter_keep.next().unwrap()); - - for (i, frame) in frames.iter().enumerate() { - info!("Logging frame {}", i); - frame - .plot( - stream, - String::from("points/Original"), - None, - plotting_extras.0.clone(), - ) - .unwrap(); - } - } - - let progbar = indicatif::ProgressBar::new(frames.len() as u64); - let denoised_frames: Vec = frames + let progbar = indicatif::ProgressBar::new(elems.len() as u64); + let denoised_elements: Vec = elems .into_par_iter() .progress_with(progbar) .map(|x| self.denoise(x)) .collect::>(); - if let Some(stream) = record_stream.as_mut() { - for (i, frame) in denoised_frames.iter().enumerate() { - trace!("Logging frame {}", i); - frame - .plot( - stream, - String::from("points/denoised"), - None, - plotting_extras.1.clone(), - ) - .unwrap(); - } - } - - denoised_frames + denoised_elements } } @@ -327,7 +283,6 @@ pub fn read_all_ms1_denoising( max_mz_extension: f64, ims_scaling: f32, max_ims_extension: f32, - record_stream: &mut Option, ) -> Vec { let reader = timsrust::FileReader::new(path).unwrap(); @@ -357,20 +312,17 @@ pub fn read_all_ms1_denoising( mz_converter, }; - let converters = (ims_converter, mz_converter); let mut timer = utils::ContextTimer::new("Denoising all MS1 frames", true, utils::LogLevel::INFO); - let out = ms1_denoiser.par_denoise_slice(frames, record_stream, (converters, None)); + let out = ms1_denoiser.par_denoise_slice(frames); timer.stop(true); out } // This could probably be a macro ... -// Maybe I should just pass the config ... instead of the elements pub fn read_all_dia_denoising( path: String, config: DenoiseConfig, - record_stream: &mut Option, ) -> (Vec, DIAFrameInfo) { let mut timer = utils::ContextTimer::new("Reading all DIA frames", true, utils::LogLevel::INFO); let reader = timsrust::FileReader::new(path.clone()).unwrap(); @@ -398,11 +350,9 @@ pub fn read_all_dia_denoising( ims_converter, mz_converter, }; - let converters = (ims_converter, mz_converter); - let mut timer = utils::ContextTimer::new("Denoising all MS2 frames", true, utils::LogLevel::INFO); - let split_frames = denoiser.par_denoise_slice(frames, record_stream, (converters, None)); + let split_frames = denoiser.par_denoise_slice(frames); let out: Vec = split_frames.into_iter().flatten().collect(); timer.stop(true); diff --git a/src/aggregation/trace_combination.rs b/src/aggregation/trace_combination.rs deleted file mode 100644 index 98f652a..0000000 --- a/src/aggregation/trace_combination.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::mod_types::Float; - -/// This is an attempt to use dbscan ... again to cluster the traces into pseudo-spectra (or peptides) -/// -/// The idea is to first calculate the distances using a sliding window of the traces. -/// - The first distance will just be a generalized iou of the traces. -/// Once calculated it will implement an indexed space interface (that allows query) -/// - -/// This is an index that represent a sparse matrix of similarities between traces. -/// -/// The main idea is that for a number of points N, the `similarities` Vec has length N. -/// Each element of `similarities` is a Vec of tuples of (index, similarity) where index -/// is the index of the other trace and the similarity is the similarity between the two traces. -/// -/// Therefore, the entry for similarities[i], where there is an entry (w, s) shold also have -/// an entry in similarities[w] of (i, s) -struct TraceSimilarityIndex { - similarities: Vec>>, -} diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 14ac700..fff6214 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -1,10 +1,8 @@ use crate::aggregation::dbscan::{dbscan_generic, ClusterAggregator}; -use crate::mod_types::Float; use crate::ms::frames::{DenseFrame, DenseFrameWindow, TimsPeak}; use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike}; use crate::utils; use crate::utils::RollingSDCalculator; -use crate::visualization::RerunPlottable; use crate::space::space_generics::NDBoundary; use crate::aggregation::chromatograms::{BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS}; @@ -205,7 +203,6 @@ pub fn combine_traces( denseframe_windows: Vec, config: TracingConfig, rt_binsize: f32, - record_stream: &mut Option, ) -> Vec { // mz_scaling: f64, // rt_scaling: f64, @@ -271,81 +268,9 @@ pub fn combine_traces( info!("Total Combined traces: {}", out.len()); timer.stop(true); - if let Some(stream) = record_stream.as_mut() { - let _ = out.plot(stream, String::from("points/combined"), None, None); - } - out } -impl RerunPlottable> for Vec { - fn plot( - &self, - rec: &mut rerun::RecordingStream, - entry_path: String, - log_time_in_seconds: Option, - required_extras: Option, - ) -> Result<(), Box> { - // Sort by retention time and make groups of 1s - let mut outs = Vec::new(); - let mut sorted_traces = (*self).clone(); - sorted_traces.sort_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); - - let mut groups: Vec> = Vec::new(); - - let mut group: Vec = Vec::new(); - let mut last_second = sorted_traces[0].rt as u32; - for trace in sorted_traces { - let curr_second = trace.rt as u32; - if curr_second != last_second { - groups.push(group.clone()); - group = Vec::new(); - } - last_second = curr_second; - group.push(trace); - } - - // For each group - // Plot the group - for group in groups { - let mut peaks = Vec::new(); - for trace in group { - peaks.push(TimsPeak { - mz: trace.mz, - intensity: trace.intensity.try_into().unwrap_or(u32::MAX), - mobility: trace.mobility, - npeaks: trace.num_agg.try_into().unwrap_or(u32::MAX), - }) - } - - // Pack them into a denseframe - let df = DenseFrame { - raw_peaks: peaks, - rt: last_second as f64, - index: 10 * last_second as usize, // I just need and index and back-calculating it is not worth my time. - frame_type: timsrust::FrameType::Unknown, - sorted: None, - }; - - // Plot the denseframe - let out = df.plot( - rec, - entry_path.clone(), - log_time_in_seconds, - required_extras, - ); - if out.is_err() { - error!("Error plotting pseudo-denseframe: {:?}", out); - } else { - info!("Plotted pseudo-denseframe"); - } - outs.push(out); - } - - Ok(()) - } -} - #[derive(Debug, Clone)] struct TraceAggregator { @@ -449,9 +374,9 @@ impl NDPointConverter for TimeTimsPeakConverter { fn convert(&self, elem: &TimeTimsPeak) -> NDPoint<3> { NDPoint { values: [ - (elem.mz / self.mz_scaling) as Float, - (elem.rt as f64 / self.rt_scaling) as Float, - (elem.ims as f64 / self.ims_scaling) as Float, + (elem.mz / self.mz_scaling) as f32, + (elem.rt as f64 / self.rt_scaling) as f32, + (elem.ims as f64 / self.ims_scaling) as f32, ], } } @@ -667,9 +592,9 @@ impl NDPointConverter for BaseTraceConverter { let quad_center = (elem.quad_low + elem.quad_high) / 2.; NDPoint { values: [ - (elem.rt as f64 / self.rt_scaling) as Float, - (elem.mobility as f64 / self.ims_scaling) as Float, - (quad_center as f64 / self.quad_scaling) as Float, + (elem.rt as f64 / self.rt_scaling) as f32, + (elem.mobility as f64 / self.ims_scaling) as f32, + (quad_center as f64 / self.quad_scaling) as f32, ], } } @@ -713,9 +638,9 @@ impl NDPointConverter for PseudoScanBackConverter { let quad_mid = (elem.quad_low + elem.quad_high) / 2.; NDPoint { values: [ - (elem.rt as f64 / self.rt_scaling) as Float, - (elem.ims as f64 / self.ims_scaling) as Float, - (quad_mid as f64 / self.quad_scaling) as Float, + (elem.rt as f64 / self.rt_scaling) as f32, + (elem.ims as f64 / self.ims_scaling) as f32, + (quad_mid as f64 / self.quad_scaling) as f32, ], } } @@ -755,7 +680,6 @@ impl Default for PseudoscanGenerationConfig { pub fn combine_pseudospectra( traces: Vec, config: PseudoscanGenerationConfig, - record_stream: &mut Option, ) -> Vec { let mut timer = utils::ContextTimer::new("Combining pseudospectra", true, utils::LogLevel::INFO); @@ -791,10 +715,10 @@ pub fn combine_pseudospectra( ims_scaling: config.ims_scaling.into(), quad_scaling: config.quad_scaling.into(), }; - let max_extension_distances: [Float; 3] = [ - config.max_rt_expansion_ratio as Float, - config.max_ims_expansion_ratio as Float, - config.max_quad_expansion_ratio as Float, + let max_extension_distances: [f32; 3] = [ + config.max_rt_expansion_ratio as f32, + config.max_ims_expansion_ratio as f32, + config.max_quad_expansion_ratio as f32, ]; let foo: Vec = dbscan_generic( @@ -812,11 +736,6 @@ pub fn combine_pseudospectra( info!("Combined pseudospectra: {}", foo.len()); timer.stop(true); - - if let Some(_stream) = record_stream.as_mut() { - warn!("Plotting pseudospectra is not implemented yet"); - } - foo } diff --git a/src/lib.rs b/src/lib.rs index d99977e..3ed1c6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,5 @@ pub mod aggregation; -mod mod_types; pub mod ms; pub mod scoring; pub mod space; pub mod utils; -pub mod visualization; diff --git a/src/main.rs b/src/main.rs index 074e09a..aaf97af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,13 +9,11 @@ mod aggregation; mod extraction; -mod mod_types; mod ms; mod scoring; mod space; mod utils; -mod visualization; extern crate log; extern crate pretty_env_logger; @@ -98,17 +96,12 @@ fn main() { pretty_env_logger::init(); - let mut rec: Option = None; - if cfg!(feature = "viz") { - rec = Some(visualization::setup_recorder()); - } - let path_use = args.files; if path_use.len() != 1 { panic!("I have only implemented one path!!!"); } let path_use = path_use[0].clone(); - // ms_denoise::read_all_ms1_denoising(path_use.clone(), &mut rec); + // ms_denoise::read_all_ms1_denoising(path_use.clone()); let out_path_dir = Path::new(&args.output_dir); // Create dir if not exists ... @@ -135,7 +128,6 @@ fn main() { let (dia_frames, dia_info) = aggregation::ms_denoise::read_all_dia_denoising( path_use.clone(), config.denoise_config, - &mut rec, ); let cycle_time = dia_info.calculate_cycle_time(); @@ -145,7 +137,6 @@ fn main() { dia_frames, config.tracing_config, cycle_time, - &mut rec, ); let out = match out_traces_path { @@ -171,7 +162,6 @@ fn main() { let pseudoscans = aggregation::tracing::combine_pseudospectra( traces, config.pseudoscan_generation_config, - &mut rec, ); // Report min/max/average/std and skew for ims and rt diff --git a/src/mod_types.rs b/src/mod_types.rs deleted file mode 100644 index 4bc0982..0000000 --- a/src/mod_types.rs +++ /dev/null @@ -1,10 +0,0 @@ -// Floating-point precision is configured here -// https://users.rust-lang.org/t/generics-using-either-f32-or-f64/28647/3 -#[cfg(not(feature = "f64"))] -pub type Float = f32; -#[cfg(not(feature = "f64"))] -pub use std::f32 as floats; -#[cfg(feature = "f64")] -pub type Float = f64; -#[cfg(feature = "f64")] -pub use std::f64 as floats; diff --git a/src/ms/frames.rs b/src/ms/frames.rs index 91fe0f8..b2decda 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -5,10 +5,7 @@ pub use timsrust::{ ConvertableIndex, FileReader, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, }; -use crate::mod_types::Float; use crate::ms::tdf::{DIAFrameInfo, ScanRange}; -use crate::space::space_generics::NDPoint; -use crate::visualization::RerunPlottable; use log::info; @@ -61,14 +58,12 @@ pub enum SortingOrder { /// - intensities. [123, 111, 12 , 3, 4, 1 ...] len = len(tof indices) /// - index 34 /// - rt 65.34 -/// Additions for FrameWindow: +/// Additions for FrameQuadWindow: /// - scan_start 123 // The scan number of the first scan offset in the current window. /// - group_id 1 // The group id of the current window. /// - quad_group_id 2 // The quad group id of the current window within the current group. #[derive(Debug, Clone)] -pub struct FrameWindow { - /// A vector of length (s) where contiguous elements represent - /// +pub struct FrameQuadWindow { pub scan_offsets: Vec, pub tof_indices: Vec, pub intensities: Vec, @@ -104,7 +99,7 @@ pub struct DenseFrameWindow { impl DenseFrameWindow { pub fn from_frame_window( - frame_window: FrameWindow, + frame_window: FrameQuadWindow, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, dia_info: &DIAFrameInfo, @@ -186,7 +181,7 @@ impl DenseFrame { } pub fn from_frame_window( - frame_window: FrameWindow, + frame_window: FrameQuadWindow, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, ) -> DenseFrame { @@ -270,134 +265,3 @@ impl DenseFrame { } pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); - -impl RerunPlottable> for DenseFrame { - fn plot( - &self, - rec: &mut rerun::RecordingStream, - entry_path: String, - log_time_in_seconds: Option, - _required_extras: Option, - ) -> Result<(), Box> { - let rt = match log_time_in_seconds { - None => self.rt, - Some(log_time_in_seconds) => log_time_in_seconds as f64, - }; - rec.set_time_seconds("rt_seconds", rt); - - info!("Plotting frame {}::{}s into {}", self.index, rt, entry_path); - let min_mz = self - .raw_peaks - .iter() - .map(|peak| peak.mz) - .fold(f64::INFINITY, |a, b| a.min(b)); - let max_mz = self - .raw_peaks - .iter() - .map(|peak| peak.mz) - .fold(f64::NEG_INFINITY, |a, b| a.max(b)); - let min_mobility = self - .raw_peaks - .iter() - .map(|peak| peak.mobility as f64) - .fold(f64::INFINITY, |a, b| a.min(b)); - let max_mobility = self - .raw_peaks - .iter() - .map(|peak| peak.mobility as f64) - .fold(f64::NEG_INFINITY, |a, b| a.max(b)); - let num_points = self.raw_peaks.len(); - - info!( - "mz range: {:?}:{:?}, ims range {:?}:{:?}", - min_mz, max_mz, min_mobility, max_mobility - ); - info!("num points: {}", num_points); - let quad_points = self - .raw_peaks - .iter() - .map(|peak| NDPoint { - values: [(peak.mz / 10.) as Float, (100. * peak.mobility as Float)], - }) - .collect::>(); - - let max_intensity = self.raw_peaks.iter().map(|peak| peak.intensity).max(); - - let max_intensity: f32 = match max_intensity { - None => { - info!("No max intensity found for frame {}", self.index); - 0. - } - Some(max_intensity) => max_intensity as f32, - }; - - let radii = self - .raw_peaks - .iter() - .map(|peak| (peak.intensity as f32) / max_intensity) - .collect::>(); - - rec.log( - entry_path.as_str(), - &rerun::Points2D::new( - quad_points - .iter() - .map(|point| (point.values[0], point.values[1])), - ) - .with_radii(radii), - )?; - - Ok(()) - } -} - -impl RerunPlottable for Frame { - fn plot( - &self, - rec: &mut rerun::RecordingStream, - entry_path: String, - log_time_in_seconds: Option, - required_extras: Converters, - ) -> Result<(), Box> { - let dense_frame = DenseFrame::new(self, &required_extras.0, &required_extras.1); - dense_frame.plot(rec, entry_path, log_time_in_seconds, None) - } -} - -impl RerunPlottable> for DenseFrameWindow { - fn plot( - &self, - rec: &mut rerun::RecordingStream, - entry_path: String, - log_time_in_seconds: Option, - required_extras: Option, - ) -> Result<(), Box> { - let df = &self.frame; - df.plot(rec, entry_path, log_time_in_seconds, required_extras) - } -} - -impl RerunPlottable> for Vec { - fn plot( - &self, - rec: &mut rerun::RecordingStream, - entry_path: String, - log_time_in_seconds: Option, - required_extras: Option, - ) -> Result<(), Box> { - let mut peaks = Vec::new(); - for dfw in self.iter() { - peaks.extend(dfw.frame.raw_peaks.clone()); - } - - let densepeak_plot = DenseFrame { - raw_peaks: peaks, - index: self[0].frame.index, - rt: self[0].frame.rt, - frame_type: FrameType::Unknown, - sorted: None, - }; - - densepeak_plot.plot(rec, entry_path, log_time_in_seconds, required_extras) - } -} diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index a420aca..4f718cd 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -3,7 +3,7 @@ use rusqlite::{Connection, Result}; use std::path::Path; use timsrust::{ConvertableIndex, Frame}; -use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameWindow}; +use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; // Diaframemsmsinfo = vec of frame_id -> windowgroup_id // diaframemsmswindows = vec[(windowgroup_id, scanstart, scanend, iso_mz, iso_with, nce)] @@ -60,6 +60,12 @@ pub struct DIAWindowGroup { pub scan_ranges: Vec, } +#[derive(Debug, Clone)] +pub enum GroupingLevel { + WindowGroup, // Technically this is the same as the frame level ... + QuadWindowGroup, +} + #[derive(Debug, Clone)] pub struct DIAFrameInfo { pub groups: Vec>, @@ -68,6 +74,7 @@ pub struct DIAFrameInfo { /// that the frame belongs to. pub frame_groups: Vec>, pub retention_times: Vec>, + pub grouping_level: GroupingLevel, } // TODO rename or split this ... since it is becoming more @@ -154,7 +161,7 @@ impl DIAFrameInfo { avg_cycle_time } - pub fn split_frame(&self, frame: Frame) -> Result, &'static str> { + pub fn split_frame(&self, frame: Frame) -> Result, &'static str> { let group = self.get_group(frame.index); if group.is_none() { return Err("Frame not in DIA group"); @@ -175,7 +182,7 @@ impl DIAFrameInfo { let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec(); let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec(); - let frame_window = FrameWindow { + let frame_window = FrameQuadWindow { scan_offsets: scan_offsets_use .iter() .map(|x| (x - scan_start) as u64) @@ -462,7 +469,10 @@ pub fn read_dia_frame_info(dotd_file: String) -> Result { let mut groups_map_vec: Vec>> = (0..(max_window_id + 1)).map(|_| None).collect(); + + let mut num_scan_ranges = 0; for (group, scan_start, scan_end, iso_mz, iso_width, nce) in groups_vec { + num_scan_ranges += 1; let scan_range = ScanRange::new( scan_start, scan_end, @@ -484,6 +494,14 @@ pub fn read_dia_frame_info(dotd_file: String) -> Result { } } + let grouping_level = if num_scan_ranges > 200 { + log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diagonal PASEF?)"); + GroupingLevel::WindowGroup + } else { + log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diaPASEF?)"); + GroupingLevel::QuadWindowGroup + }; + let mut groups_vec_o = (0..(max_window_id + 1)).map(|_| None).collect::>(); for (i, scan_ranges) in groups_map_vec.into_iter().enumerate() { let scan_ranges = match scan_ranges { @@ -501,6 +519,7 @@ pub fn read_dia_frame_info(dotd_file: String) -> Result { groups: groups_vec_o, frame_groups: ids_map_vec, retention_times: DIAFrameInfo::rts_from_tdf_connection(&conn)?, + grouping_level, }; Ok(frame_info) diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index 2ec13ff..e4ca69f 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -1,14 +1,12 @@ -use crate::mod_types::Float; use crate::space::space_generics::{IndexedPoints, NDBoundary, NDPoint}; use log::warn; -const EPSILON: Float = Float::EPSILON; // Implements a kdtree with several minor differences. #[derive(Debug, Clone)] pub struct RadiusKDTree<'a, T, const DIMENSIONALITY: usize> { boundary: NDBoundary, capacity: usize, - radius: Float, + radius: f32, points: Vec<(NDPoint, &'a T)>, high_split: Option>>, low_split: Option>>, @@ -17,7 +15,7 @@ pub struct RadiusKDTree<'a, T, const DIMENSIONALITY: usize> { // Since ranges are [closed, open) by convention, // I could think of the splits to be [low_bounds, division_value) // and [division_value, high_bounds). - division_value: Option, + division_value: Option, count: usize, level: usize, } @@ -28,7 +26,7 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { pub fn new_empty( boundary: NDBoundary, capacity: usize, - radius: Float, + radius: f32, ) -> RadiusKDTree<'a, T, D> { RadiusKDTree { boundary, @@ -88,7 +86,7 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { let _low_bounds = self.boundary.starts; let _high_bounds = self.boundary.ends; let mut longest_axis: Option = None; - let mut longest_axis_length: Option = None; + let mut longest_axis_length: Option = None; for i in 0..D { let axis_length = self.boundary.widths[i]; @@ -103,7 +101,7 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { let mut keep = false; for point in self.points.iter() { let diff = (point.0.values[i] - axis_val_first).abs(); - if diff > EPSILON { + if diff > f32::EPSILON { keep = true; break; } @@ -183,14 +181,14 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { .values .iter() .map(|x| x - self.radius) - .collect::>() + .collect::>() .try_into() .unwrap(), point .values .iter() .map(|x| x + self.radius) - .collect::>() + .collect::>() .try_into() .unwrap(), )); @@ -243,7 +241,7 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { .iter() .zip(point.values.iter()) .map(|(x, y)| (x - y).abs()) - .sum::(); + .sum::(); dist < self.radius }) .map(|x| x.1) diff --git a/src/space/quad.rs b/src/space/quad.rs index 89d097e..2d0b276 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -1,5 +1,3 @@ -use crate::mod_types::Float; - use crate::space::space_generics::{IndexedPoints, NDBoundary, NDPoint}; use core::panic; use log::trace; @@ -8,7 +6,7 @@ use log::trace; pub struct RadiusQuadTree<'a, T> { boundary: NDBoundary<2>, capacity: usize, - radius: Float, + radius: f32, points: Vec<(NDPoint<2>, &'a T)>, northeast: Option>>, northwest: Option>>, @@ -23,7 +21,7 @@ impl<'a, T> RadiusQuadTree<'a, T> { pub fn new_empty( boundary: NDBoundary<2>, capacity: usize, - radius: Float, + radius: f32, ) -> RadiusQuadTree<'a, T> { RadiusQuadTree { boundary, diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index dbcb5ee..b3c9094 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -1,18 +1,14 @@ -use crate::mod_types::Float; -// f32 or f64 depending on compilation - -const EPSILON: Float = Float::EPSILON; #[derive(Debug, Clone, Copy)] pub struct NDBoundary { - pub starts: [Float; DIMENSIONALITY], - pub ends: [Float; DIMENSIONALITY], - pub widths: [Float; DIMENSIONALITY], - pub centers: [Float; DIMENSIONALITY], + pub starts: [f32; DIMENSIONALITY], + pub ends: [f32; DIMENSIONALITY], + pub widths: [f32; DIMENSIONALITY], + pub centers: [f32; DIMENSIONALITY], } impl NDBoundary { - pub fn new(starts: [Float; D], ends: [Float; D]) -> NDBoundary { + pub fn new(starts: [f32; D], ends: [f32; D]) -> NDBoundary { let mut widths = [0.0; D]; let mut centers = [0.0; D]; for i in 0..D { @@ -56,16 +52,16 @@ impl NDBoundary { } pub fn from_ndpoints(points: &[NDPoint]) -> NDBoundary { - let mut starts = [Float::MAX; D]; - let mut ends = [Float::MIN; D]; + let mut starts = [f32::MAX; D]; + let mut ends = [f32::MIN; D]; for point in points.iter() { for i in 0..D { if point.values[i] < starts[i] { - starts[i] = point.values[i] - EPSILON; + starts[i] = point.values[i] - f32::EPSILON; } if point.values[i] > ends[i] { - ends[i] = point.values[i] + EPSILON; + ends[i] = point.values[i] + f32::EPSILON; } } } @@ -73,7 +69,7 @@ impl NDBoundary { NDBoundary::new(starts, ends) } - pub fn expand(&mut self, factors: &[Float; D]) { + pub fn expand(&mut self, factors: &[f32; D]) { for (i, ef) in factors.iter().enumerate() { let mut half_width = self.widths[i] / 2.0; let center = self.centers[i]; @@ -92,7 +88,7 @@ impl NDBoundary { // Oddly enough ... adding copy makes it slower ... #[derive(Debug, Clone)] pub struct NDPoint { - pub values: [Float; DIMENSIONALITY], + pub values: [f32; DIMENSIONALITY], } // Q: is there any instance where T is not usize? diff --git a/src/visualization.rs b/src/visualization.rs deleted file mode 100644 index aea8708..0000000 --- a/src/visualization.rs +++ /dev/null @@ -1,24 +0,0 @@ -pub trait RerunPlottable { - fn plot( - &self, - rec: &mut rerun::RecordingStream, - entry_path: String, - log_time_in_seconds: Option, - required_extras: T, - ) -> Result<(), Box>; -} - -// #[cfg(feature='viz')] -pub fn setup_recorder() -> rerun::RecordingStream { - let rec = rerun::RecordingStreamBuilder::new("rerun_jspp_denoiser").connect(); - match rec { - Ok(rec) => { - rec.set_time_seconds("rt_seconds", 0.0f64); - rec - } - Err(e) => { - // If the viz mode is on ... there has to be a viz... - panic!("Error setting up recorder: {:?}", e); - } - } -} From fc1c4859c22e2bbce76b22935319a28065f17032 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 23 Jun 2024 21:51:43 -0700 Subject: [PATCH 02/26] (wip,broken) migration to sqlx and alternative splitting detection --- Cargo.lock | 1211 +++++++++++++++++++++++++++++++- Cargo.toml | 6 +- README.md | 4 +- src/aggregation/aggregators.rs | 55 ++ src/aggregation/converters.rs | 30 + src/aggregation/dbscan.rs | 79 +-- src/aggregation/mod.rs | 2 + src/aggregation/tracing.rs | 5 +- src/main.rs | 170 +++-- src/ms/tdf.rs | 322 +++++---- 10 files changed, 1531 insertions(+), 353 deletions(-) create mode 100644 src/aggregation/aggregators.rs create mode 100644 src/aggregation/converters.rs diff --git a/Cargo.lock b/Cargo.lock index 9e73482..8f72f87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli", +] + [[package]] name = "adler" version = "1.0.2" @@ -103,7 +112,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -113,7 +122,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -204,13 +213,22 @@ dependencies = [ "num", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -221,12 +239,33 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +[[package]] +name = "backtrace" +version = "0.3.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base64" version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + [[package]] name = "bitflags" version = "1.3.2" @@ -238,6 +277,18 @@ name = "bitflags" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +dependencies = [ + "serde", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] [[package]] name = "brotli" @@ -310,7 +361,7 @@ dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets", + "windows-targets 0.52.5", ] [[package]] @@ -341,7 +392,7 @@ version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ - "heck", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.66", @@ -369,9 +420,15 @@ dependencies = [ "lazy_static", "libc", "unicode-width", - "windows-sys", + "windows-sys 0.52.0", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-random" version = "0.1.18" @@ -392,12 +449,46 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.4.2" @@ -426,6 +517,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.20" @@ -438,6 +538,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "csv" version = "1.3.0" @@ -473,6 +583,29 @@ dependencies = [ "rayon", ] +[[package]] +name = "der" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + [[package]] name = "displaydoc" version = "0.1.7" @@ -484,11 +617,20 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + [[package]] name = "either" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +dependencies = [ + "serde", +] [[package]] name = "encode_unicode" @@ -515,6 +657,33 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "fallible-iterator" version = "0.2.0" @@ -527,6 +696,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + [[package]] name = "flatbuffers" version = "23.5.26" @@ -547,12 +722,129 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "futures-core", + "futures-sink", + "spin 0.9.8", +] + [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -564,6 +856,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + [[package]] name = "half" version = "2.4.1" @@ -594,6 +892,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "heck" version = "0.5.0" @@ -609,6 +916,45 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "humantime" version = "1.3.0" @@ -641,6 +987,16 @@ dependencies = [ "cc", ] +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "indexmap" version = "2.2.6" @@ -687,17 +1043,19 @@ dependencies = [ "clap", "csv", "indicatif", + "libsqlite3-sys", "log", "num", "num-traits", "pretty_env_logger", "rand", "rayon", - "rusqlite", "sage-core", "serde", "serde_json", + "sqlx", "timsrust", + "tokio", "toml", ] @@ -745,6 +1103,9 @@ name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +dependencies = [ + "spin 0.5.2", +] [[package]] name = "lexical-core" @@ -843,6 +1204,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + [[package]] name = "lock_api" version = "0.4.12" @@ -879,6 +1246,16 @@ dependencies = [ "libc", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.2" @@ -894,6 +1271,12 @@ dependencies = [ "libc", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.3" @@ -903,6 +1286,44 @@ dependencies = [ "adler", ] +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num" version = "0.4.3" @@ -928,14 +1349,31 @@ dependencies = [ ] [[package]] -name = "num-complex" -version = "0.4.6" +name = "num-bigint-dig" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" dependencies = [ - "num-traits", -] - + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -977,18 +1415,81 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "object" +version = "0.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "ordered-float" version = "2.10.1" @@ -998,6 +1499,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + [[package]] name = "parking_lot_core" version = "0.9.10" @@ -1006,9 +1517,9 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.1", "smallvec", - "windows-targets", + "windows-targets 0.52.5", ] [[package]] @@ -1048,6 +1559,54 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.30" @@ -1150,6 +1709,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.5.1" @@ -1188,6 +1756,26 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +[[package]] +name = "rsa" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rusqlite" version = "0.29.0" @@ -1202,6 +1790,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc_version" version = "0.4.0" @@ -1211,6 +1805,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "ryu" version = "1.0.18" @@ -1231,12 +1838,44 @@ dependencies = [ "serde", ] +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "security-framework" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +dependencies = [ + "bitflags 2.5.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.23" @@ -1289,6 +1928,47 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -1301,18 +1981,275 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlformat" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f895e3734318cc55f1fe66258926c9b910c124d47520339efecbb6c59cec7c1f" +dependencies = [ + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e50c216e3624ec8e7ecd14c6a6a6370aad6ee5d8cfc3ab30b5162eeeef2ed33" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d6753e460c998bbd4cd8c6f0ed9a64346fcca0723d6e75e52fdc351c5d2169d" +dependencies = [ + "ahash", + "atoi", + "byteorder", + "bytes", + "crc", + "crossbeam-queue", + "dotenvy", + "either", + "event-listener", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashlink", + "hex", + "indexmap", + "log", + "memchr", + "native-tls", + "once_cell", + "paste", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlformat", + "thiserror", + "tokio", + "tokio-stream", + "tracing", + "url", +] + +[[package]] +name = "sqlx-macros" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a793bb3ba331ec8359c1853bd39eed32cdd7baaf22c35ccf5c92a7e8d1189ec" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 1.0.109", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4ee1e104e00dedb6aa5ffdd1343107b0a4702e862a84320ee7cc74782d96fc" +dependencies = [ + "dotenvy", + "either", + "heck 0.4.1", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-sqlite", + "syn 1.0.109", + "tempfile", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "864b869fdf56263f4c95c45483191ea0af340f9f3e3e7b4d57a61c7c87a970db" +dependencies = [ + "atoi", + "base64", + "bitflags 2.5.0", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb7ae0e6a97fb3ba33b23ac2671a5ce6e3cabe003f451abd5a56e7951d975624" +dependencies = [ + "atoi", + "base64", + "bitflags 2.5.0", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand", + "serde", + "serde_json", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59dc83cf45d89c555a577694534fcd1b55c545a816c816ce51f20bbe56a4f3f" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "sqlx-core", + "tracing", + "url", +] + [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subtle" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0208408ba0c3df17ed26eb06992cb1a1268d41b2c0e12e65203fbe3972cee5" + [[package]] name = "syn" version = "1.0.109" @@ -1335,6 +2272,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1401,6 +2350,48 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "0.8.13" @@ -1435,6 +2426,38 @@ dependencies = [ "winnow", ] +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + [[package]] name = "twox-hash" version = "1.6.3" @@ -1445,18 +2468,68 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + [[package]] name = "unicode-width" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "url" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "utf8parse" version = "0.2.2" @@ -1481,6 +2554,12 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.92" @@ -1535,6 +2614,16 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "whoami" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +dependencies = [ + "redox_syscall 0.4.1", + "wasite", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1557,7 +2646,7 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -1572,7 +2661,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", ] [[package]] @@ -1581,7 +2679,22 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -1590,28 +2703,46 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.5" @@ -1624,24 +2755,48 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.5" @@ -1677,6 +2832,12 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zstd" version = "0.12.4" diff --git a/Cargo.toml b/Cargo.toml index 8bdcb58..58f8f65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,6 @@ license = "Apache-2.0" [dependencies] timsrust = "0.2.2" -rusqlite = { version = "0.29.0", features = ["bundled"] } # Serialization serde = { version = "1.0.193", features = ["derive"] } @@ -21,9 +20,12 @@ num = "0.4.1" num-traits = "0.2.18" clap = {version = "4.4.17", features = ["derive"]} csv = "1.3.0" - rand = "0.8.5" +sqlx = { version = "0.7.2", features = ["runtime-tokio-native-tls", "sqlite"]} +libsqlite3-sys = "^0.26.0" +tokio = {version="1.38.0", features = ["rt", "rt-multi-thread"]} + # Sage sage-core = { git = "https://github.com/lazear/sage.git", rev = "9e870429889b341c4773df32b65e553283301a93" } toml = "0.8.8" diff --git a/README.md b/README.md index 2f545c8..3dc5b37 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,8 @@ There are a couple of features for development. ### Env variables ``` RUST_LOG=info # will change the log level ... levels are standard (info, debug, warn, error, trace) -DEBUG_TRACES_FROM_CACHE=1 # If set and non empty will load the traces from the cache. -# It will skip the generation of the traces and will read the file specified on the config. (handy when optimizing the pseudospectra generation) +RUST_BACKTRACE=1 # will show a backtrace on panic +RAYON_NUM_THREADS=4 # will set the number of threads to use in rayon ``` ## Roadmap diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs new file mode 100644 index 0000000..740f260 --- /dev/null +++ b/src/aggregation/aggregators.rs @@ -0,0 +1,55 @@ + +use crate::ms::frames::TimsPeak; + +/// A trait for aggregating points into a single point. +/// This is used for the final step of dbscan. +/// +/// Types are: +/// T: The type of the points to be aggregated. +/// R: The type of the aggregated point. +/// S: The type of the aggregator. +/// +pub trait ClusterAggregator { + fn add(&mut self, elem: &T); + fn aggregate(&self) -> R; + fn combine(self, other: Self) -> Self; +} + +#[derive(Default, Debug)] +pub struct TimsPeakAggregator { + pub cluster_intensity: u64, + pub cluster_mz: f64, + pub cluster_mobility: f64, + pub num_peaks: u64, +} + +impl ClusterAggregator for TimsPeakAggregator { + fn add(&mut self, elem: &TimsPeak) { + let f64_intensity = elem.intensity as f64; + debug_assert!((elem.intensity as u64) < (u64::MAX - self.cluster_intensity)); + self.cluster_intensity += elem.intensity as u64; + self.cluster_mz += elem.mz * f64_intensity; + self.cluster_mobility += (elem.mobility as f64) * f64_intensity; + self.num_peaks += 1; + } + + fn aggregate(&self) -> TimsPeak { + let cluster_mz = self.cluster_mz / self.cluster_intensity as f64; + let cluster_mobility = self.cluster_mobility / self.cluster_intensity as f64; + TimsPeak { + intensity: self.cluster_intensity as u32, + mz: cluster_mz, + mobility: cluster_mobility as f32, + npeaks: self.num_peaks as u32, + } + } + + fn combine(self, other: Self) -> Self { + Self { + cluster_intensity: self.cluster_intensity + other.cluster_intensity, + cluster_mz: self.cluster_mz + other.cluster_mz, + cluster_mobility: self.cluster_mobility + other.cluster_mobility, + num_peaks: self.num_peaks + other.num_peaks, + } + } +} diff --git a/src/aggregation/converters.rs b/src/aggregation/converters.rs new file mode 100644 index 0000000..0703dfd --- /dev/null +++ b/src/aggregation/converters.rs @@ -0,0 +1,30 @@ + +use crate::ms::frames::TimsPeak; +use crate::space::space_generics::NDPointConverter; +use crate::space::space_generics::NDPoint; + +// https://github.com/rust-lang/rust/issues/35121 +// The never type is not stable yet.... +pub struct BypassDenseFrameBackConverter {} + +impl NDPointConverter for BypassDenseFrameBackConverter { + fn convert(&self, _elem: &TimsPeak) -> NDPoint<2> { + panic!("This should never be called") + } +} + +pub struct DenseFrameConverter { + pub mz_scaling: f64, + pub ims_scaling: f32, +} + +impl NDPointConverter for DenseFrameConverter { + fn convert(&self, elem: &TimsPeak) -> NDPoint<2> { + NDPoint { + values: [ + (elem.mz / self.mz_scaling) as f32, + (elem.mobility / self.ims_scaling) as f32, + ], + } + } +} diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan.rs index ab8b407..f936e60 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan.rs @@ -14,6 +14,8 @@ use crate::utils; /// 1. Intensity usage. /// use crate::ms::frames; +use crate::aggregation::aggregators::{ClusterAggregator, TimsPeakAggregator}; +use crate::aggregation::converters::{DenseFrameConverter, BypassDenseFrameBackConverter}; use crate::space::space_generics::{HasIntensity, IndexedPoints, NDPoint}; use indicatif::ProgressIterator; use log::{debug, info, trace}; @@ -353,58 +355,6 @@ fn _dbscan< (cluster_id, cluster_labels) } -/// A trait for aggregating points into a single point. -/// This is used for the final step of dbscan. -/// -/// Types are: -/// T: The type of the points to be aggregated. -/// R: The type of the aggregated point. -/// S: The type of the aggregator. -/// -pub trait ClusterAggregator { - fn add(&mut self, elem: &T); - fn aggregate(&self) -> R; - fn combine(self, other: Self) -> Self; -} - -#[derive(Default, Debug)] -struct TimsPeakAggregator { - cluster_intensity: u64, - cluster_mz: f64, - cluster_mobility: f64, - num_peaks: u64, -} - -impl ClusterAggregator for TimsPeakAggregator { - fn add(&mut self, elem: &TimsPeak) { - let f64_intensity = elem.intensity as f64; - debug_assert!((elem.intensity as u64) < (u64::MAX - self.cluster_intensity)); - self.cluster_intensity += elem.intensity as u64; - self.cluster_mz += elem.mz * f64_intensity; - self.cluster_mobility += (elem.mobility as f64) * f64_intensity; - self.num_peaks += 1; - } - - fn aggregate(&self) -> TimsPeak { - let cluster_mz = self.cluster_mz / self.cluster_intensity as f64; - let cluster_mobility = self.cluster_mobility / self.cluster_intensity as f64; - frames::TimsPeak { - intensity: self.cluster_intensity as u32, - mz: cluster_mz, - mobility: cluster_mobility as f32, - npeaks: self.num_peaks as u32, - } - } - - fn combine(self, other: Self) -> Self { - Self { - cluster_intensity: self.cluster_intensity + other.cluster_intensity, - cluster_mz: self.cluster_mz + other.cluster_mz, - cluster_mobility: self.cluster_mobility + other.cluster_mobility, - num_peaks: self.num_peaks + other.num_peaks, - } - } -} fn _inner, R>( chunk: &[(usize, T)], @@ -721,31 +671,6 @@ pub fn dbscan_generic< } } -// https://github.com/rust-lang/rust/issues/35121 -// The never type is not stable yet.... -struct BypassDenseFrameBackConverter {} - -impl NDPointConverter for BypassDenseFrameBackConverter { - fn convert(&self, _elem: &frames::TimsPeak) -> NDPoint<2> { - panic!("This should never be called") - } -} - -struct DenseFrameConverter { - mz_scaling: f64, - ims_scaling: f32, -} - -impl NDPointConverter for DenseFrameConverter { - fn convert(&self, elem: &TimsPeak) -> NDPoint<2> { - NDPoint { - values: [ - (elem.mz / self.mz_scaling) as f32, - (elem.mobility / self.ims_scaling) as f32, - ], - } - } -} type FFTimsPeak = fn(&TimsPeak, &TimsPeak) -> bool; // bool> diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 4a6cae1..04e8dc4 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -1,4 +1,6 @@ pub mod dbscan; pub mod ms_denoise; +pub mod converters; +pub mod aggregators; pub mod tracing; pub mod chromatograms; diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index fff6214..6084041 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -1,5 +1,6 @@ -use crate::aggregation::dbscan::{dbscan_generic, ClusterAggregator}; -use crate::ms::frames::{DenseFrame, DenseFrameWindow, TimsPeak}; +use crate::aggregation::dbscan::dbscan_generic; +use crate::aggregation::aggregators::ClusterAggregator; +use crate::ms::frames::DenseFrameWindow; use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike}; use crate::utils; use crate::utils::RollingSDCalculator; diff --git a/src/main.rs b/src/main.rs index aaf97af..0fc6362 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,103 +114,91 @@ fn main() { let out_traces_path = config.output_config.debug_traces_csv.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); let out_path_features = config.output_config.out_features_csv.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); - let mut traces_from_cache = env::var("DEBUG_TRACES_FROM_CACHE").is_ok(); - if traces_from_cache && out_path_scans.is_none() { - log::warn!("DEBUG_TRACES_FROM_CACHE is set but no output path is set, will fall back to generating traces."); - traces_from_cache = false; - } + log::info!("Reading DIA data from: {}", path_use); + let (dia_frames, dia_info) = aggregation::ms_denoise::read_all_dia_denoising( + path_use.clone(), + config.denoise_config, + ); - let mut pseudoscans = if traces_from_cache { - let pseudoscans_read = aggregation::tracing::read_pseudoscans_json(out_path_scans.unwrap()); - pseudoscans_read.unwrap() - } else { - log::info!("Reading DIA data from: {}", path_use); - let (dia_frames, dia_info) = aggregation::ms_denoise::read_all_dia_denoising( - path_use.clone(), - config.denoise_config, - ); - - let cycle_time = dia_info.calculate_cycle_time(); - - // TODO add here expansion limits - let mut traces = aggregation::tracing::combine_traces( - dia_frames, - config.tracing_config, - cycle_time, - ); - - let out = match out_traces_path { - Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), - None => Ok(()), - }; - match out { - Ok(_) => {} - Err(e) => { - log::warn!("Error writing traces: {:?}", e); - } - } + let cycle_time = dia_info.calculate_cycle_time(); - println!("traces: {:?}", traces.len()); - traces.retain(|x| x.num_agg > 5); - println!("traces: {:?}", traces.len()); - if traces.len() > 5 { - println!("sample_trace: {:?}", traces[traces.len() - 4]) - } + // TODO add here expansion limits + let mut traces = aggregation::tracing::combine_traces( + dia_frames, + config.tracing_config, + cycle_time, + ); - // Maybe reparametrize as 1.1 cycle time - // TODO add here expansion limits - let pseudoscans = aggregation::tracing::combine_pseudospectra( - traces, - config.pseudoscan_generation_config, - ); - - // Report min/max/average/std and skew for ims and rt - // This can probably be a macro ... - let ims_stats = - utils::get_stats(&pseudoscans.iter().map(|x| x.ims as f64).collect::>()); - let ims_sd_stats = utils::get_stats( - &pseudoscans - .iter() - .map(|x| x.ims_std as f64) - .collect::>(), - ); - let rt_stats = - utils::get_stats(&pseudoscans.iter().map(|x| x.rt as f64).collect::>()); - let rt_sd_stats = utils::get_stats( - &pseudoscans - .iter() - .map(|x| x.rt_std as f64) - .collect::>(), - ); - let npeaks = utils::get_stats( - &pseudoscans - .iter() - .map(|x| x.peaks.len() as f64) - .collect::>(), - ); - - println!("ims_stats: {:?}", ims_stats); - println!("rt_stats: {:?}", rt_stats); - - println!("ims_sd_stats: {:?}", ims_sd_stats); - println!("rt_sd_stats: {:?}", rt_sd_stats); - - println!("npeaks: {:?}", npeaks); - - let out = match out_path_scans { - Some(out_path) => aggregation::tracing::write_pseudoscans_json(&pseudoscans, out_path), - None => Ok(()), - }; - - match out { - Ok(_) => {} - Err(e) => { - log::warn!("Error writing pseudoscans: {:?}", e); - } + let out = match out_traces_path { + Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), + None => Ok(()), + }; + match out { + Ok(_) => {} + Err(e) => { + log::warn!("Error writing traces: {:?}", e); } - pseudoscans + } + + println!("traces: {:?}", traces.len()); + traces.retain(|x| x.num_agg > 5); + println!("traces: {:?}", traces.len()); + if traces.len() > 5 { + println!("sample_trace: {:?}", traces[traces.len() - 4]) + } + + // Maybe reparametrize as 1.1 cycle time + // TODO add here expansion limits + let mut pseudoscans = aggregation::tracing::combine_pseudospectra( + traces, + config.pseudoscan_generation_config, + ); + + // Report min/max/average/std and skew for ims and rt + // This can probably be a macro ... + let ims_stats = + utils::get_stats(&pseudoscans.iter().map(|x| x.ims as f64).collect::>()); + let ims_sd_stats = utils::get_stats( + &pseudoscans + .iter() + .map(|x| x.ims_std as f64) + .collect::>(), + ); + let rt_stats = + utils::get_stats(&pseudoscans.iter().map(|x| x.rt as f64).collect::>()); + let rt_sd_stats = utils::get_stats( + &pseudoscans + .iter() + .map(|x| x.rt_std as f64) + .collect::>(), + ); + let npeaks = utils::get_stats( + &pseudoscans + .iter() + .map(|x| x.peaks.len() as f64) + .collect::>(), + ); + + println!("ims_stats: {:?}", ims_stats); + println!("rt_stats: {:?}", rt_stats); + + println!("ims_sd_stats: {:?}", ims_sd_stats); + println!("rt_sd_stats: {:?}", rt_sd_stats); + + println!("npeaks: {:?}", npeaks); + + let out = match out_path_scans { + Some(out_path) => aggregation::tracing::write_pseudoscans_json(&pseudoscans, out_path), + None => Ok(()), }; + match out { + Ok(_) => {} + Err(e) => { + log::warn!("Error writing pseudoscans: {:?}", e); + } + } + println!("pseudoscans: {:?}", pseudoscans.len()); pseudoscans.retain(|x| x.peaks.len() > 5); diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index 4f718cd..df272f8 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -1,7 +1,12 @@ use log::{debug, error, info, trace}; -use rusqlite::{Connection, Result}; -use std::path::Path; + +use sqlx::Pool; +use std::path::{Path, PathBuf}; use timsrust::{ConvertableIndex, Frame}; +use sqlx::{Row, Sqlite, SqlitePool,FromRow}; +use tokio; +use tokio::runtime::Runtime; + use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; @@ -10,6 +15,7 @@ use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; #[derive(Debug, Clone)] pub struct ScanRange { + pub id: usize, pub scan_start: usize, pub scan_end: usize, pub iso_mz: f32, @@ -23,6 +29,7 @@ pub struct ScanRange { impl ScanRange { pub fn new( + id: usize, scan_start: usize, scan_end: usize, iso_mz: f32, @@ -41,6 +48,7 @@ impl ScanRange { let iso_high = iso_mz + iso_width / 2.0; Self { + id, scan_start, scan_end, iso_mz, @@ -81,43 +89,30 @@ pub struct DIAFrameInfo { // of a splitter than a frame info reader. // Maybe a builder -> splitter pattern? impl DIAFrameInfo { - pub fn get_group(&self, frame_id: usize) -> Option<&DIAWindowGroup> { + pub fn get_dia_frame_window_group(&self, frame_id: usize) -> Option<&DIAWindowGroup> { let group_id = self.frame_groups[frame_id]; - - match group_id { - None => None, - Some(group_id) => self.groups[group_id].as_ref(), + if group_id.is_none() { + return None; } + self.groups[group_id.unwrap()].as_ref() } - fn rts_from_tdf_connection(conn: &Connection) -> Result>> { + + async fn rts_from_tdf_connection(conn: &Pool) -> Result>, sqlx::Error> { // To calculate cycle time -> // DiaFrameMsMsInfo -> Get the frames that match a specific id (one for each ...) // Frames -> SELECT id, time FROM Frames -> make a Vec>, map the former // framer id list (no value should be None). // Scan diff the new vec! - let mut stmt = conn.prepare("SELECT Id, Time FROM Frames")?; - let mut times = Vec::new(); - let res = stmt.query_map([], |row| { - let id: usize = row.get(0)?; - let time: f32 = row.get(1)?; - Ok((id, time)) - }); - - match res { - Ok(x) => { - for y in x { - let (id, time) = y.unwrap(); - times.resize(id + 1, None); - times[id] = Some(time); - } - } - Err(e) => { - error!("Error reading Frames: {}", e); - } + let results:Vec<(i32, f32)> = sqlx::query_as("SELECT Id, Time FROM Frames").fetch_all(conn).await?; + let mut retention_times = Vec::new(); + for row in results.iter() { + let id: usize = row.0 as usize; + let time: f32 = row.1; + retention_times.resize(id + 1, None); + retention_times[id] = Some(time); } - - Ok(times) + Ok(retention_times) } pub fn calculate_cycle_time(&self) -> f32 { @@ -203,12 +198,13 @@ impl DIAFrameInfo { Ok(out_frames) } - pub fn split_dense_frame(&self, mut denseframe: DenseFrame) -> Result> { - let group = self.get_group(denseframe.index); - // if group.is_none() { - // return Err("Frame not in DIA group".into()); - // } - let group = group.unwrap(); + + pub fn split_frames() { + + } + + pub fn split_dense_frame(&self, mut denseframe: DenseFrame) -> Vec { + let group = self.get_dia_frame_window_group(denseframe.index).expect("Frame not in DIA group"); // Steps // 1. Sort by ims @@ -263,7 +259,7 @@ impl DIAFrameInfo { frames.push(frame_window); } - Ok(frames) + frames } /// Returns a vector of length equal to the number of groups. @@ -271,7 +267,7 @@ impl DIAFrameInfo { fn bundle_by_group(&self, frames: Vec) -> Vec> { let mut frame_groups = Vec::new(); for frame in frames { - let group = self.get_group(frame.index); + let group = self.get_dia_frame_window_group(frame.index); if group.is_none() { continue; } @@ -321,15 +317,8 @@ impl DIAFrameInfo { info!("Processing group {}", i); for frame in frame_bundle { let frame_windows = self.split_dense_frame(frame); - match frame_windows { - Ok(frame_windows) => { - for frame_window in frame_windows { - out[i][frame_window.quad_group_id].push(frame_window); - } - } - Err(e) => { - error!("Error splitting frame: {}", e); - } + for frame_window in frame_windows { + out[i][frame_window.quad_group_id].push(frame_window); } } } @@ -390,137 +379,162 @@ impl DIAFrameInfo { // FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) // ) WITHOUT ROWID -// TODO refactor this to make it a constructor method ... -pub fn read_dia_frame_info(dotd_file: String) -> Result { - let reader = timsrust::FileReader::new(dotd_file.clone()).unwrap(); - let scan_converter = reader.get_scan_converter().unwrap(); - // Find an 'analysis.tdf' file inside the dotd file (directory). - let tdf_path = Path::new(dotd_file.as_str()).join("analysis.tdf"); +#[derive(Clone, FromRow, Debug)] +pub struct DiaFrameMsMsWindowInfo { + pub window_group: i32, + pub scan_num_begin: i32, + pub scan_num_end: i32, + pub isolation_mz: f32, + pub isolation_width: f32, + pub collision_energy: f32, +} - info!("tdf_path: {:?}", tdf_path); - let conn = Connection::open(tdf_path)?; +impl DiaFrameMsMsWindowInfo { + fn into_scan_range(&self, id: usize, scan_converter: &timsrust::Scan2ImConverter) -> ScanRange { + ScanRange::new( + id, + self.scan_num_begin as usize, + self.scan_num_end as usize, + self.isolation_mz, + self.isolation_width, + self.collision_energy, + scan_converter, + ) + } +} - let mut stmt_ids = conn.prepare("SELECT Frame, WindowGroup FROM DiaFrameMsMsInfo")?; +struct FrameInfoBuilder { + pub tdf_path: String, + pub scan_converter: timsrust::Scan2ImConverter, +} - let mut ids_vec: Vec<(usize, usize)> = Vec::new(); - let res = stmt_ids.query_map([], |row| { - let id: usize = row.get(0)?; - let group: usize = row.get(1)?; - Ok((id, group)) - }); +impl FrameInfoBuilder { + pub fn from_dotd_path(dotd_path: String) -> Self { + let reader = timsrust::FileReader::new(dotd_path.clone()).unwrap(); + let scan_converter = reader.get_scan_converter().unwrap(); - match res { - Ok(x) => { - for id_group in x { - ids_vec.push(id_group.unwrap()); - } - } - Err(e) => { - error!("Error reading DiaFrameMsMsInfo: {}", e); - } + // Find an 'analysis.tdf' file inside the dotd file (directory). + let tdf_path = Path::new(dotd_path.as_str()).join("analysis.tdf").into_os_string().into_string().unwrap(); + info!("tdf_path: {:?}", tdf_path); + Self { tdf_path, scan_converter } } - let max_id = ids_vec.iter().map(|(id, _)| id).max().unwrap(); - let mut ids_map_vec = vec![None; max_id + 1]; - for (id, group) in ids_vec { - ids_map_vec[id] = Some(group); + pub fn build(&self) -> Result { + let mut rt = Runtime::new().unwrap(); + + rt.block_on(async { + self.build_async().await + }) } - let mut stmt_groups = conn.prepare( - "SELECT - WindowGroup, - ScanNumBegin, - ScanNumEnd, - IsolationMz, - IsolationWidth, - CollisionEnergy - FROM DiaFrameMsMsWindows", - )?; - let mut groups_vec: Vec<(usize, usize, usize, f32, f32, f32)> = Vec::new(); - let res = stmt_groups.query_map([], |row| { - Ok(( - row.get(0)?, - row.get(1)?, - row.get(2)?, - row.get(3)?, - row.get(4)?, - row.get(5)?, - )) - }); - - match res { - Ok(x) => { - for group in x { - groups_vec.push(group.unwrap()); + async fn build_async(&self) -> Result { + let db = SqlitePool::connect(&self.tdf_path).await?; + + // This vec maps frame_id -> window_group_id + let frame_info = self.get_frame_mapping(&db).await?; + + // This vec maps window_group_id -> Vec + // And also returns the grouping level. + let (group_mapping, grouping_level) = self.get_frame_windows(&db).await?; + + let max_window_id = group_mapping.len() - 1; + + let mut groups_vec_o = (0..(max_window_id + 1)).map(|_| None).collect::>(); + for (i, scan_ranges) in group_mapping.into_iter().enumerate() { + let scan_ranges = match scan_ranges { + None => continue, + Some(scan_ranges) => scan_ranges, + }; + if scan_ranges.is_empty() { + continue; + } else { + groups_vec_o[i] = Some(DIAWindowGroup { id: i, scan_ranges }); } } - Err(e) => { - error!("Error reading DiaFrameMsMsWindows: {}", e); - } - } - let max_window_id = groups_vec - .iter() - .map(|(id, _, _, _, _, _)| *id) - .max() - .unwrap(); + let frame_info = DIAFrameInfo { + groups: groups_vec_o, + frame_groups: frame_info, + retention_times: DIAFrameInfo::rts_from_tdf_connection(&db).await?, + grouping_level, + }; - let mut groups_map_vec: Vec>> = - (0..(max_window_id + 1)).map(|_| None).collect(); + Ok(frame_info) + } - let mut num_scan_ranges = 0; - for (group, scan_start, scan_end, iso_mz, iso_width, nce) in groups_vec { - num_scan_ranges += 1; - let scan_range = ScanRange::new( - scan_start, - scan_end, - iso_mz, - iso_width, - nce, - &scan_converter, - ); + async fn get_frame_mapping(&self, db: &Pool) -> Result>, sqlx::Error>{ + let result: Vec<(i32, i32)> = sqlx::query_as( + "SELECT Frame, WindowGroup FROM DiaFrameMsMsInfo;", + ) + .fetch_all(db).await?; - if groups_map_vec[group].is_none() { - groups_map_vec[group] = Some(Vec::new()); - } + let frame_info = result.iter().map(|(id, group)| (*id as usize, *group as usize)).collect::>(); - match &mut groups_map_vec[group] { - None => continue, - Some(scan_ranges) => { - scan_ranges.push(scan_range); - } + let max_id = frame_info.iter().map(|(id, _)| id).max().unwrap(); + let mut ids_map_vec = vec![None; max_id + 1]; + for (id, group) in frame_info { + ids_map_vec[id] = Some(group); } + + Ok(ids_map_vec) } - let grouping_level = if num_scan_ranges > 200 { - log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diagonal PASEF?)"); - GroupingLevel::WindowGroup - } else { - log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diaPASEF?)"); - GroupingLevel::QuadWindowGroup - }; - - let mut groups_vec_o = (0..(max_window_id + 1)).map(|_| None).collect::>(); - for (i, scan_ranges) in groups_map_vec.into_iter().enumerate() { - let scan_ranges = match scan_ranges { - None => continue, - Some(scan_ranges) => scan_ranges, - }; - if scan_ranges.is_empty() { - continue; + async fn get_frame_windows(&self, db: &Pool) -> Result<(Vec>>, GroupingLevel), sqlx::Error> { + let result: Vec = sqlx::query_as::<_, DiaFrameMsMsWindowInfo>( + "SELECT + WindowGroup, + ScanNumBegin, + ScanNumEnd, + IsolationMz, + IsolationWidth, + CollisionEnergy + FROM DiaFrameMsMsWindows", + ) + .fetch_all(db).await.unwrap(); + + let grouping_level = if result.len() > 200 { + log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diagonal PASEF?)"); + GroupingLevel::WindowGroup } else { - groups_vec_o[i] = Some(DIAWindowGroup { id: i, scan_ranges }); + log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diaPASEF?)"); + GroupingLevel::QuadWindowGroup + }; + + let max_window_id: usize = result + .iter() + .map(|window| window.window_group) + .max() + .unwrap() as usize; + + let mut group_map_vec: Vec>> = vec![None; max_window_id + 1]; + + let mut scangroup_id = 0; + for window in result { + // TODO this is maybe a good place to make the trouping ... + // If its diapasef, the groups are quad+window groups. + // If its diagonal, the groups are only window groups. + let usize_wg = window.window_group as usize; + if group_map_vec[usize_wg].is_none() { + group_map_vec[usize_wg] = Some(Vec::new()); + } + + match &mut group_map_vec[usize_wg] { + None => continue, + Some(scan_ranges) => { + scan_ranges.push(window.into_scan_range(scangroup_id.clone(), &self.scan_converter)); + scangroup_id += 1; + } + } } + Ok((group_map_vec, grouping_level)) } - let frame_info = DIAFrameInfo { - groups: groups_vec_o, - frame_groups: ids_map_vec, - retention_times: DIAFrameInfo::rts_from_tdf_connection(&conn)?, - grouping_level, - }; +} - Ok(frame_info) +// TODO refactor this to make it a constructor method ... +pub fn read_dia_frame_info(dotd_file: String) -> Result { + let builder = FrameInfoBuilder::from_dotd_path(dotd_file); + builder.build() } From c89cc6ce03ffe2f094d32b6b65b6e412677faf49 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 24 Jun 2024 07:35:22 -0700 Subject: [PATCH 03/26] (wip) changed frame breaking order and work towards new denoising logic --- src/aggregation/aggregators.rs | 167 +++++++++++++++++++- src/aggregation/converters.rs | 2 +- src/aggregation/dbscan.rs | 281 +++++++-------------------------- src/aggregation/ms_denoise.rs | 142 ++++++++++++----- src/aggregation/tracing.rs | 24 +-- src/extraction.rs | 13 -- src/main.rs | 2 - src/ms/frames.rs | 4 +- src/ms/tdf.rs | 148 +++++++++++------ 9 files changed, 446 insertions(+), 337 deletions(-) delete mode 100644 src/extraction.rs diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index 740f260..fd8e8bd 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,5 +1,19 @@ - use crate::ms::frames::TimsPeak; +use crate::space::space_generics::HasIntensity; +use crate::utils; +use num::cast::AsPrimitive; +use std::ops::{Add, Div, Mul, Sub}; + +use rayon::prelude::*; + +// I Dont really like having this here but I am not sure where else to +// define it ... since its needed by the aggregation functions +#[derive(Debug, PartialEq, Clone)] +pub enum ClusterLabel { + Unassigned, + Noise, + Cluster(T), +} /// A trait for aggregating points into a single point. /// This is used for the final step of dbscan. @@ -53,3 +67,154 @@ impl ClusterAggregator for TimsPeakAggregator { } } } + +pub fn aggregate_clusters< + T: HasIntensity + Send + Clone + Copy, + G: Sync + Send + ClusterAggregator, + R: Send, + F: Fn() -> G + Send + Sync, + Z: AsPrimitive + + Send + + Sync + + Add + + PartialOrd + + Div + + Mul + + Default + + Sub, +>( + tot_clusters: u64, + cluster_labels: Vec>, + elements: &[T], + def_aggregator: &F, + log_level: utils::LogLevel, + keep_unclustered: bool, +) -> Vec { + let cluster_vecs: Vec = if cfg!(feature = "par_dataprep") { + let mut timer = + utils::ContextTimer::new("dbscan_generic::par_aggregation", true, log_level); + let out: Vec<(usize, T)> = cluster_labels + .iter() + .enumerate() + .filter_map(|(point_index, x)| match x { + ClusterLabel::Cluster(cluster_id) => { + let cluster_idx = *cluster_id as usize - 1; + let tmp: Option<(usize, T)> = Some((cluster_idx, elements[point_index])); + tmp + } + _ => None, + }) + .collect(); + + let run_closure = + |chunk: Vec<(usize, T)>| _inner(&chunk, tot_clusters as usize, &def_aggregator); + let chunk_size = (out.len() / rayon::current_num_threads()) / 2; + let chunk_size = chunk_size.max(1); + let out2 = out + .into_par_iter() + .chunks(chunk_size) + .map(run_closure) + .reduce(Vec::new, |l, r| { + if l.is_empty() { + r + } else { + l.into_iter() + .zip(r) + .map(|(l, r)| match (l, r) { + (Some(l), Some(r)) => { + let o = l.combine(r); + Some(o) + } + (Some(l), None) => Some(l), + (None, Some(r)) => Some(r), + (None, None) => None, + }) + .collect::>() + } + }); + + let mut cluster_vecs = out2.into_iter().flatten().collect::>(); + + let unclustered_elems: Vec = cluster_labels + .iter() + .enumerate() + .filter(|(_, x)| match x { + ClusterLabel::Unassigned => true, + ClusterLabel::Noise => keep_unclustered, + _ => false, + }) + .map(|(i, _elem)| i) + .collect(); + + // if unclustered_elems.len() > 0 { + // log::debug!("Total Orig elems: {}", cluster_labels.len()); + // log::debug!("Unclustered elems: {}", unclustered_elems.len()); + // log::debug!("Clustered elems: {}", cluster_vecs.len()); + // } + + let unclustered_elems = unclustered_elems + .iter() + .map(|i| { + let mut oe = def_aggregator(); + oe.add(&elements[*i]); + oe + }) + .collect::>(); + + cluster_vecs.extend(unclustered_elems); + + timer.stop(true); + cluster_vecs + } else { + let mut cluster_vecs: Vec = Vec::with_capacity(tot_clusters as usize); + let mut unclustered_points: Vec = Vec::new(); + for _ in 0..tot_clusters { + cluster_vecs.push(def_aggregator()); + } + for (point_index, cluster_label) in cluster_labels.iter().enumerate() { + match cluster_label { + ClusterLabel::Cluster(cluster_id) => { + let cluster_idx = *cluster_id as usize - 1; + cluster_vecs[cluster_idx].add(&(elements[point_index])); + } + ClusterLabel::Noise => { + if keep_unclustered { + let mut oe = def_aggregator(); + oe.add(&elements[point_index]); + unclustered_points.push(oe); + } + } + _ => {} + } + } + cluster_vecs.extend(unclustered_points); + cluster_vecs + }; + + let mut timer = + utils::ContextTimer::new("dbscan_generic::aggregation", true, utils::LogLevel::TRACE); + let out = cluster_vecs + .par_iter() + .map(|cluster| cluster.aggregate()) + .collect::>(); + timer.stop(true); + + out +} + +fn _inner, R>( + chunk: &[(usize, T)], + max_cluster_id: usize, + def_aggregator: &dyn Fn() -> G, +) -> Vec> { + let mut cluster_vecs: Vec> = (0..max_cluster_id).map(|_| None).collect(); + + for (cluster_idx, point) in chunk { + if cluster_vecs[*cluster_idx].is_none() { + cluster_vecs[*cluster_idx] = Some(def_aggregator()); + } + cluster_vecs[*cluster_idx].as_mut().unwrap().add(point); + } + + cluster_vecs +} diff --git a/src/aggregation/converters.rs b/src/aggregation/converters.rs index 0703dfd..ebffcc4 100644 --- a/src/aggregation/converters.rs +++ b/src/aggregation/converters.rs @@ -23,7 +23,7 @@ impl NDPointConverter for DenseFrameConverter { NDPoint { values: [ (elem.mz / self.mz_scaling) as f32, - (elem.mobility / self.ims_scaling) as f32, + (elem.mobility / self.ims_scaling), ], } } diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan.rs index f936e60..82ce70b 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan.rs @@ -3,19 +3,14 @@ use std::ops::{Add, Div, Mul, Sub}; use crate::ms::frames::TimsPeak; use crate::space::space_generics::NDPointConverter; -use crate::utils::within_distance_apply; use crate::utils; +use crate::utils::within_distance_apply; -/// Density-based spatial clustering of applications with noise (DBSCAN) -/// -/// This module implements a variant of dbscan with a couple of modifications -/// with respect to the vanilla implementation. -/// -/// 1. Intensity usage. -/// +use crate::aggregation::aggregators::{ + aggregate_clusters, ClusterAggregator, ClusterLabel, TimsPeakAggregator, +}; +use crate::aggregation::converters::{BypassDenseFrameBackConverter, DenseFrameConverter}; use crate::ms::frames; -use crate::aggregation::aggregators::{ClusterAggregator, TimsPeakAggregator}; -use crate::aggregation::converters::{DenseFrameConverter, BypassDenseFrameBackConverter}; use crate::space::space_generics::{HasIntensity, IndexedPoints, NDPoint}; use indicatif::ProgressIterator; use log::{debug, info, trace}; @@ -26,44 +21,42 @@ use crate::space::kdtree::RadiusKDTree; use num::cast::AsPrimitive; -// Pseudocode from wikipedia. -// Donate to wikipedia y'all. :3 +/// Density-based spatial clustering of applications with noise (DBSCAN) +/// +/// This module implements a variant of dbscan with a couple of modifications +/// with respect to the vanilla implementation. +/// +/// Pseudocode from wikipedia. +/// Donate to wikipedia y'all. :3 // -// DBSCAN(DB, distFunc, eps, minPts) { -// C := 0 /* Cluster counter */ -// for each point P in database DB { -// if label(P) ≠ undefined then continue /* Previously processed in inner loop */ -// Neighbors N := RangeQuery(DB, distFunc, P, eps) /* Find neighbors */ -// if |N| < minPts then { /* Density check */ -// label(P) := Noise /* Label as Noise */ -// continue -// } -// C := C + 1 /* next cluster label */ -// label(P) := C /* Label initial point */ -// SeedSet S := N \ {P} /* Neighbors to expand */ -// for each point Q in S { /* Process every seed point Q */ -// if label(Q) = Noise then label(Q) := C /* Change Noise to border point */ -// if label(Q) ≠ undefined then continue /* Previously processed (e.g., border point) */ -// label(Q) := C /* Label neighbor */ -// Neighbors N := RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ -// if |N| ≥ minPts then { /* Density check (if Q is a core point) */ -// S := S ∪ N /* Add new neighbors to seed set */ -// } -// } -// } -// } - -// Variations ... -// 1. Use a quadtree to find neighbors -// 2. Sort the pointd by decreasing intensity (more intense points adopt first). -// 3. Use an intensity threshold intead of a minimum number of neighbors. - -#[derive(Debug, PartialEq, Clone)] -pub enum ClusterLabel { - Unassigned, - Noise, - Cluster(T), -} +/// DBSCAN(DB, distFunc, eps, minPts) { +/// C := 0 /* Cluster counter */ +/// for each point P in database DB { +/// if label(P) ≠ undefined then continue /* Previously processed in inner loop */ +/// Neighbors N := RangeQuery(DB, distFunc, P, eps) /* Find neighbors */ +/// if |N| < minPts then { /* Density check */ +/// label(P) := Noise /* Label as Noise */ +/// continue +/// } +/// C := C + 1 /* next cluster label */ +/// label(P) := C /* Label initial point */ +/// SeedSet S := N \ {P} /* Neighbors to expand */ +/// for each point Q in S { /* Process every seed point Q */ +/// if label(Q) = Noise then label(Q) := C /* Change Noise to border point */ +/// if label(Q) ≠ undefined then continue /* Previously processed (e.g., border point) */ +/// label(Q) := C /* Label neighbor */ +/// Neighbors N := RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ +/// if |N| ≥ minPts then { /* Density check (if Q is a core point) */ +/// S := S ∪ N /* Add new neighbors to seed set */ +/// } +/// } +/// } +/// } +/// Variations ... +/// 1. Indexing is am implementation detail to find the neighbors (generic indexer) +/// 2. Sort the pointd by decreasing intensity (more intense points adopt first). +/// 3. Use an intensity threshold intead of a minimum number of neighbors. +/// 4. There are ways to define the limits to the extension of a cluster. impl HasIntensity for frames::TimsPeak { fn intensity(&self) -> u32 { @@ -169,7 +162,7 @@ fn _dbscan< filter_fun: Option, converter: C, progress: bool, - max_extension_distances: &[f32;N], + max_extension_distances: &[f32; N], ) -> (u64, Vec>) { let mut initial_candidates_counts = utils::RollingSDCalculator::default(); let mut final_candidates_counts = utils::RollingSDCalculator::default(); @@ -306,7 +299,12 @@ fn _dbscan< let query_point = query_elems.1.unwrap(); // Using minkowski distance with p = 1, manhattan distance. let mut within_distance = true; - for ((p, q), max_dist) in p.values.iter().zip(query_point.values).zip(max_extension_distances.iter()) { + for ((p, q), max_dist) in p + .values + .iter() + .zip(query_point.values) + .zip(max_extension_distances.iter()) + { let dist = (p - q).abs(); within_distance = within_distance && dist <= *max_dist; if !within_distance { @@ -355,158 +353,6 @@ fn _dbscan< (cluster_id, cluster_labels) } - -fn _inner, R>( - chunk: &[(usize, T)], - max_cluster_id: usize, - def_aggregator: &dyn Fn() -> G, -) -> Vec> { - let mut cluster_vecs: Vec> = (0..max_cluster_id).map(|_| None).collect(); - - for (cluster_idx, point) in chunk { - if cluster_vecs[*cluster_idx].is_none() { - cluster_vecs[*cluster_idx] = Some(def_aggregator()); - } - cluster_vecs[*cluster_idx].as_mut().unwrap().add(point); - } - - cluster_vecs -} - -pub fn aggregate_clusters< - T: HasIntensity + Send + Clone + Copy, - G: Sync + Send + ClusterAggregator, - R: Send, - F: Fn() -> G + Send + Sync, - Z: AsPrimitive - + Send - + Sync - + Add - + PartialOrd - + Div - + Mul - + Default - + Sub, ->( - tot_clusters: u64, - cluster_labels: Vec>, - elements: &[T], - def_aggregator: &F, - log_level: utils::LogLevel, - keep_unclustered: bool, -) -> Vec { - let cluster_vecs: Vec = if cfg!(feature = "par_dataprep") { - let mut timer = - utils::ContextTimer::new("dbscan_generic::par_aggregation", true, log_level); - let out: Vec<(usize, T)> = cluster_labels - .iter() - .enumerate() - .filter_map(|(point_index, x)| match x { - ClusterLabel::Cluster(cluster_id) => { - let cluster_idx = *cluster_id as usize - 1; - let tmp: Option<(usize, T)> = Some((cluster_idx, elements[point_index])); - tmp - } - _ => None, - }) - .collect(); - - let run_closure = - |chunk: Vec<(usize, T)>| _inner(&chunk, tot_clusters as usize, &def_aggregator); - let chunk_size = (out.len() / rayon::current_num_threads()) / 2; - let chunk_size = chunk_size.max(1); - let out2 = out - .into_par_iter() - .chunks(chunk_size) - .map(run_closure) - .reduce(Vec::new, |l, r| { - if l.is_empty() { - r - } else { - l.into_iter() - .zip(r) - .map(|(l, r)| match (l, r) { - (Some(l), Some(r)) => { - let o = l.combine(r); - Some(o) - } - (Some(l), None) => Some(l), - (None, Some(r)) => Some(r), - (None, None) => None, - }) - .collect::>() - } - }); - - let mut cluster_vecs = out2.into_iter().flatten().collect::>(); - - let unclustered_elems: Vec = cluster_labels - .iter() - .enumerate() - .filter(|(_, x)| match x { - ClusterLabel::Unassigned => true, - ClusterLabel::Noise => keep_unclustered, - _ => false, - }) - .map(|(i, _elem)| i) - .collect(); - - // if unclustered_elems.len() > 0 { - // log::debug!("Total Orig elems: {}", cluster_labels.len()); - // log::debug!("Unclustered elems: {}", unclustered_elems.len()); - // log::debug!("Clustered elems: {}", cluster_vecs.len()); - // } - - let unclustered_elems = unclustered_elems - .iter() - .map(|i| { - let mut oe = def_aggregator(); - oe.add(&elements[*i]); - oe - }) - .collect::>(); - - cluster_vecs.extend(unclustered_elems); - - timer.stop(true); - cluster_vecs - } else { - let mut cluster_vecs: Vec = Vec::with_capacity(tot_clusters as usize); - let mut unclustered_points: Vec = Vec::new(); - for _ in 0..tot_clusters { - cluster_vecs.push(def_aggregator()); - } - for (point_index, cluster_label) in cluster_labels.iter().enumerate() { - match cluster_label { - ClusterLabel::Cluster(cluster_id) => { - let cluster_idx = *cluster_id as usize - 1; - cluster_vecs[cluster_idx].add(&(elements[point_index])); - } - ClusterLabel::Noise => { - if keep_unclustered { - let mut oe = def_aggregator(); - oe.add(&elements[point_index]); - unclustered_points.push(oe); - } - } - _ => {} - } - } - cluster_vecs.extend(unclustered_points); - cluster_vecs - }; - - let mut timer = - utils::ContextTimer::new("dbscan_generic::aggregation", true, utils::LogLevel::TRACE); - let out = cluster_vecs - .par_iter() - .map(|cluster| cluster.aggregate()) - .collect::>(); - timer.stop(true); - - out -} - // Pretty simple function ... it uses every passed centroid, converts it to a point // and generates a new centroid that aggregates all the points in its range. // In contrast with the dbscan method, the elements in each cluster are not necessarily @@ -536,7 +382,7 @@ fn reassign_centroid< elements: &Vec, def_aggregator: F, log_level: utils::LogLevel, - expansion_factors: &[f32;N], + expansion_factors: &[f32; N], ) -> Vec { let mut timer = utils::ContextTimer::new("reassign_centroid", true, log_level); let mut out = Vec::with_capacity(centroids.len()); @@ -594,7 +440,7 @@ pub fn dbscan_generic< extra_filter_fun: Option<&FF>, log_level: Option, keep_unclustered: bool, - max_extension_distances: &[f32;N], + max_extension_distances: &[f32; N], back_converter: Option, ) -> Vec { let show_progress = log_level.is_some(); @@ -623,7 +469,6 @@ pub fn dbscan_generic< .enumerate() .map(|(i, peak)| (i, peak.intensity())) .collect::>(); - // Q: Does ^^^^ need a clone? i and peak intensity ... - S intensity_sorted_indices.par_sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); i_timer.stop(true); @@ -653,25 +498,19 @@ pub fn dbscan_generic< ); match back_converter { - Some(bc) => { - - reassign_centroid( - centroids, - &tree, - bc, - &prefiltered_peaks, - &def_aggregator, - log_level, - max_extension_distances, - ) - } - None => { - centroids - } + Some(bc) => reassign_centroid( + centroids, + &tree, + bc, + &prefiltered_peaks, + &def_aggregator, + log_level, + max_extension_distances, + ), + None => centroids, } } - type FFTimsPeak = fn(&TimsPeak, &TimsPeak) -> bool; // bool> pub fn dbscan_denseframe( @@ -722,7 +561,7 @@ pub fn dbscan_denseframe( None::<&FFTimsPeak>, None, true, - &[max_mz_extension as f32, max_ims_extension as f32], + &[max_mz_extension as f32, max_ims_extension], None::, ); diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 739a360..edf1572 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -4,6 +4,7 @@ use crate::aggregation::dbscan; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; +use crate::ms::frames::FrameQuadWindow; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; use crate::utils; @@ -11,8 +12,8 @@ use crate::utils; use indicatif::ParallelProgressIterator; use log::{info, trace, warn}; use rayon::prelude::*; -use timsrust::Frame; use serde::{Deserialize, Serialize}; +use timsrust::Frame; // TODO I can probably split the ms1 and ms2 ... #[derive(Debug, Serialize, Deserialize, Clone, Copy)] @@ -112,10 +113,10 @@ fn _denoise_denseframe( max_ims_extension: f32, ) -> DenseFrame { // I am 99% sure the compiler will remove this section when optimizing ... but I still need to test it. - let frame_stats_start = FrameStats::new(&frame); + let frame_stats_start: FrameStats = FrameStats::new(&frame); let index = frame.index; + // this is the line that matters - // TODO move the scalings to parameters let denoised_frame = dbscan::dbscan_denseframe( frame, mz_scaling, @@ -146,42 +147,71 @@ fn _denoise_dia_frame( ims_scaling: f32, max_ims_extension: f32, ) -> Vec { + let window_group = dia_frame_info + .get_dia_frame_window_group(frame.index) + .unwrap(); let frame_windows = dia_frame_info - .split_frame(frame) + .split_frame(frame, window_group) .expect("Only DIA frames should be passed to this function"); frame_windows .into_iter() .map(|frame_window| { - let denseframe_window = DenseFrameWindow::from_frame_window( + denoise_frame_window( frame_window, ims_converter, mz_converter, dia_frame_info, - ); - let denoised_frame = _denoise_denseframe( - denseframe_window.frame, min_n, min_intensity, mz_scaling, max_mz_extension, ims_scaling, max_ims_extension, - ); - - DenseFrameWindow { - frame: denoised_frame, - ims_start: denseframe_window.ims_start, - ims_end: denseframe_window.ims_end, - mz_start: denseframe_window.mz_start, - mz_end: denseframe_window.mz_end, - group_id: denseframe_window.group_id, - quad_group_id: denseframe_window.quad_group_id, - } + ) }) .collect::>() } +fn denoise_frame_window( + frame_window: FrameQuadWindow, + ims_converter: &timsrust::Scan2ImConverter, + mz_converter: &timsrust::Tof2MzConverter, + dia_frame_info: &DIAFrameInfo, + min_n: usize, + min_intensity: u64, + mz_scaling: f64, + max_mz_extension: f64, + ims_scaling: f32, + max_ims_extension: f32, +) -> DenseFrameWindow { + let denseframe_window = DenseFrameWindow::from_frame_window( + frame_window, + ims_converter, + mz_converter, + dia_frame_info, + ); + let denoised_frame = _denoise_denseframe( + denseframe_window.frame, + min_n, + min_intensity, + mz_scaling, + max_mz_extension, + ims_scaling, + max_ims_extension, + ); + + DenseFrameWindow { + frame: denoised_frame, + ims_start: denseframe_window.ims_start, + ims_end: denseframe_window.ims_end, + mz_start: denseframe_window.mz_start, + mz_end: denseframe_window.mz_end, + group_id: denseframe_window.group_id, + quad_group_id: denseframe_window.quad_group_id, + } +} + trait Denoiser<'a, T, W, X, Z> where T: std::marker::Send, @@ -190,14 +220,8 @@ where Z: Clone, Vec: IntoParallelIterator, { - fn denoise(&self, elem: T) -> W { - unimplemented!() - } - - fn par_denoise_slice( - &self, - elems: Vec, - ) -> Vec + fn denoise(&self, elem: T) -> W; + fn par_denoise_slice(&self, elems: Vec) -> Vec where Self: Sync, { @@ -254,25 +278,61 @@ struct DIAFrameDenoiser { mz_converter: timsrust::Tof2MzConverter, } +// impl DIAFrameDenoiser { +// fn denoise_framewindow_slice(self, elems: Vec) -> Vec {} +// } + impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> for DIAFrameDenoiser { - fn denoise(&self, frame: Frame) -> Vec { - _denoise_dia_frame( - frame, - self.min_n, - self.min_intensity, - &self.dia_frame_info, - &self.ims_converter, - &self.mz_converter, - self.mz_scaling, - self.max_mz_extension, - self.ims_scaling, - self.max_ims_extension, - ) + fn denoise(&self, _frame: Frame) -> Vec { + panic!("This should not be called") + // _denoise_dia_frame( + // frame, + // self.min_n, + // self.min_intensity, + // &self.dia_frame_info, + // &self.ims_converter, + // &self.mz_converter, + // self.mz_scaling, + // self.max_mz_extension, + // self.ims_scaling, + // self.max_ims_extension, + // ) } -} + fn par_denoise_slice(&self, elems: Vec) -> Vec> + where + Self: Sync, + { + info!("Denoising {} frames", elems.len()); + let frame_window_slices = self.dia_frame_info.split_frame_windows(elems); + let mut out = Vec::with_capacity(frame_window_slices.len()); + for sv in frame_window_slices { + let progbar = indicatif::ProgressBar::new(sv.len() as u64); + let denoised_elements: Vec = sv + .into_par_iter() + .progress_with(progbar) + .map(|x| { + denoise_frame_window( + x, + &self.ims_converter, + &self.mz_converter, + &self.dia_frame_info, + self.min_n, + self.min_intensity, + self.mz_scaling, + self.max_mz_extension, + self.ims_scaling, + self.max_ims_extension, + ) + }) + .collect::>(); + out.push(denoised_elements); + } + out + } +} // RN this is dead but will be resurrected soon ... pub fn read_all_ms1_denoising( diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 6084041..a76def0 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -717,9 +717,9 @@ pub fn combine_pseudospectra( quad_scaling: config.quad_scaling.into(), }; let max_extension_distances: [f32; 3] = [ - config.max_rt_expansion_ratio as f32, - config.max_ims_expansion_ratio as f32, - config.max_quad_expansion_ratio as f32, + config.max_rt_expansion_ratio, + config.max_ims_expansion_ratio, + config.max_quad_expansion_ratio, ]; let foo: Vec = dbscan_generic( @@ -765,12 +765,12 @@ pub fn write_pseudoscans_json( Ok(()) } -pub fn read_pseudoscans_json( - in_path: impl AsRef, -) -> Result, Box> { - info!("Reading pseudoscans from json {}", in_path.as_ref().display()); - let file = std::fs::File::open(in_path)?; - let reader = std::io::BufReader::new(file); - let out: Vec = serde_json::from_reader(reader)?; - Ok(out) -} +// pub fn read_pseudoscans_json( +// in_path: impl AsRef, +// ) -> Result, Box> { +// info!("Reading pseudoscans from json {}", in_path.as_ref().display()); +// let file = std::fs::File::open(in_path)?; +// let reader = std::io::BufReader::new(file); +// let out: Vec = serde_json::from_reader(reader)?; +// Ok(out) +// } diff --git a/src/extraction.rs b/src/extraction.rs deleted file mode 100644 index 9474d3e..0000000 --- a/src/extraction.rs +++ /dev/null @@ -1,13 +0,0 @@ -// This whole module is greatly inspired by how sage does quant. - -// Percent of the retention time window to use for extraction. -// 1.0 is 100% just to be clear. -// This defines the whole width, chich means that half will be -// used before and half after the expected apex. -const EXTRACTION_WINDOW_PCT: f64 = 0.02; - -// Number of bins in which the grid will be divided. -const NUM_BINS: usize = 20; - -const MS1_ISOTOPES: usize = 3; -const MS2_ISOTOPES: usize = 2; diff --git a/src/main.rs b/src/main.rs index 0fc6362..165b049 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,6 @@ // mod aggregation; -mod extraction; mod ms; mod scoring; mod space; @@ -22,7 +21,6 @@ use clap::Parser; use crate::scoring::SageSearchConfig; use serde::{Deserialize, Serialize}; -use std::env; use std::fs; use std::path::Path; diff --git a/src/ms/frames.rs b/src/ms/frames.rs index b2decda..a2ec67c 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -40,7 +40,6 @@ pub enum SortingOrder { /// Unprocessed data from a 'Frame' after breaking by quad isolation_window + ims window. /// -/// /// 1. every tof-index + intensity represents a peak. /// 2. Scan offsets are monotonically increasing. /// 3. Peaks are arranged in increasing m/z order WITHIN a scan. @@ -62,6 +61,7 @@ pub enum SortingOrder { /// - scan_start 123 // The scan number of the first scan offset in the current window. /// - group_id 1 // The group id of the current window. /// - quad_group_id 2 // The quad group id of the current window within the current group. +/// - quad_row_id 3 // The quad row id of the current window within all quad windows. #[derive(Debug, Clone)] pub struct FrameQuadWindow { pub scan_offsets: Vec, @@ -70,11 +70,13 @@ pub struct FrameQuadWindow { pub index: usize, pub rt: f64, pub frame_type: FrameType, + // From this point on they are local implementations // Before they are used from the timsrust crate. pub scan_start: usize, pub group_id: usize, pub quad_group_id: usize, + pub quad_row_id: usize, } #[derive(Debug, Clone)] diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index df272f8..2d5f5c5 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -1,13 +1,12 @@ -use log::{debug, error, info, trace}; +use log::{debug, info, trace}; use sqlx::Pool; -use std::path::{Path, PathBuf}; +use sqlx::{FromRow, Row, Sqlite, SqlitePool}; +use std::path::{Path}; use timsrust::{ConvertableIndex, Frame}; -use sqlx::{Row, Sqlite, SqlitePool,FromRow}; use tokio; use tokio::runtime::Runtime; - use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; // Diaframemsmsinfo = vec of frame_id -> windowgroup_id @@ -15,7 +14,7 @@ use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; #[derive(Debug, Clone)] pub struct ScanRange { - pub id: usize, + pub row_id: usize, pub scan_start: usize, pub scan_end: usize, pub iso_mz: f32, @@ -29,7 +28,7 @@ pub struct ScanRange { impl ScanRange { pub fn new( - id: usize, + row_id: usize, scan_start: usize, scan_end: usize, iso_mz: f32, @@ -48,7 +47,7 @@ impl ScanRange { let iso_high = iso_mz + iso_width / 2.0; Self { - id, + row_id, scan_start, scan_end, iso_mz, @@ -83,6 +82,8 @@ pub struct DIAFrameInfo { pub frame_groups: Vec>, pub retention_times: Vec>, pub grouping_level: GroupingLevel, + pub number_of_groups: usize, + pub row_to_group: Vec, } // TODO rename or split this ... since it is becoming more @@ -91,20 +92,19 @@ pub struct DIAFrameInfo { impl DIAFrameInfo { pub fn get_dia_frame_window_group(&self, frame_id: usize) -> Option<&DIAWindowGroup> { let group_id = self.frame_groups[frame_id]; - if group_id.is_none() { - return None; - } + group_id?; self.groups[group_id.unwrap()].as_ref() } - async fn rts_from_tdf_connection(conn: &Pool) -> Result>, sqlx::Error> { // To calculate cycle time -> // DiaFrameMsMsInfo -> Get the frames that match a specific id (one for each ...) // Frames -> SELECT id, time FROM Frames -> make a Vec>, map the former // framer id list (no value should be None). // Scan diff the new vec! - let results:Vec<(i32, f32)> = sqlx::query_as("SELECT Id, Time FROM Frames").fetch_all(conn).await?; + let results: Vec<(i32, f32)> = sqlx::query_as("SELECT Id, Time FROM Frames") + .fetch_all(conn) + .await?; let mut retention_times = Vec::new(); for row in results.iter() { let id: usize = row.0 as usize; @@ -156,15 +156,13 @@ impl DIAFrameInfo { avg_cycle_time } - pub fn split_frame(&self, frame: Frame) -> Result, &'static str> { - let group = self.get_group(frame.index); - if group.is_none() { - return Err("Frame not in DIA group"); - } - let group = group.unwrap(); + pub fn split_frame(&self, frame: Frame, window_group: &DIAWindowGroup) -> Result, &'static str> { + // let group = self + // .get_dia_frame_window_group(frame.index) + // .expect("Frame not in DIA group, non splittable frame passed to split_frame."); let mut out_frames = Vec::new(); - for (i, scan_range) in group.scan_ranges.iter().enumerate() { + for (i, scan_range) in window_group.scan_ranges.iter().enumerate() { scan_range.scan_start; scan_range.scan_end; @@ -188,8 +186,9 @@ impl DIAFrameInfo { rt: frame.rt, frame_type: frame.frame_type, scan_start: scan_range.scan_start, - group_id: group.id, + group_id: window_group.id, quad_group_id: i, + quad_row_id: scan_range.row_id, }; out_frames.push(frame_window); @@ -198,13 +197,41 @@ impl DIAFrameInfo { Ok(out_frames) } + pub fn split_frame_windows(&self, frames: Vec) -> Vec> { + let mut out = Vec::new(); + for _ in 0..self.groups.len() { + out.push(Vec::new()); + } - pub fn split_frames() { + for frame in frames { + let group = self.get_dia_frame_window_group(frame.index).expect("Frame is not in MS2 frames"); + + match self.grouping_level { + GroupingLevel::WindowGroup => { + panic!("WindowGroup grouping level not implemented for splitting frames") + //out[group.id].push(frame_window); + } + GroupingLevel::QuadWindowGroup => { + let frame_windows = self.split_frame(frame, group).expect("Error splitting frame"); + for frame_window in frame_windows { + out[frame_window.quad_group_id].push(frame_window); + } + } + } + } + + // Sort by ascending rt + for group in out.iter_mut() { + group.sort_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); + } + out } pub fn split_dense_frame(&self, mut denseframe: DenseFrame) -> Vec { - let group = self.get_dia_frame_window_group(denseframe.index).expect("Frame not in DIA group"); + let group = self + .get_dia_frame_window_group(denseframe.index) + .expect("Frame not in DIA group"); // Steps // 1. Sort by ims @@ -379,7 +406,6 @@ impl DIAFrameInfo { // FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) // ) WITHOUT ROWID - #[derive(Clone, FromRow, Debug)] pub struct DiaFrameMsMsWindowInfo { pub window_group: i32, @@ -415,28 +441,36 @@ impl FrameInfoBuilder { let scan_converter = reader.get_scan_converter().unwrap(); // Find an 'analysis.tdf' file inside the dotd file (directory). - let tdf_path = Path::new(dotd_path.as_str()).join("analysis.tdf").into_os_string().into_string().unwrap(); + let tdf_path = Path::new(dotd_path.as_str()) + .join("analysis.tdf") + .into_os_string() + .into_string() + .unwrap(); info!("tdf_path: {:?}", tdf_path); - Self { tdf_path, scan_converter } + Self { + tdf_path, + scan_converter, + } } pub fn build(&self) -> Result { - let mut rt = Runtime::new().unwrap(); + let rt = Runtime::new().unwrap(); - rt.block_on(async { - self.build_async().await - }) + rt.block_on(async { self.build_async().await }) } async fn build_async(&self) -> Result { - let db = SqlitePool::connect(&self.tdf_path).await?; + let db = SqlitePool::connect(&self.tdf_path).await?; // This vec maps frame_id -> window_group_id let frame_info = self.get_frame_mapping(&db).await?; // This vec maps window_group_id -> Vec // And also returns the grouping level. - let (group_mapping, grouping_level) = self.get_frame_windows(&db).await?; + let (group_mapping, grouping_level, row_to_group) = self.get_frame_windows(&db).await?; + let number_of_groups = row_to_group.iter().max().unwrap() + 1; + + debug!("Number of groups: {}", number_of_groups); let max_window_id = group_mapping.len() - 1; @@ -458,19 +492,26 @@ impl FrameInfoBuilder { frame_groups: frame_info, retention_times: DIAFrameInfo::rts_from_tdf_connection(&db).await?, grouping_level, + number_of_groups, + row_to_group, }; Ok(frame_info) - } - async fn get_frame_mapping(&self, db: &Pool) -> Result>, sqlx::Error>{ - let result: Vec<(i32, i32)> = sqlx::query_as( - "SELECT Frame, WindowGroup FROM DiaFrameMsMsInfo;", - ) - .fetch_all(db).await?; - - let frame_info = result.iter().map(|(id, group)| (*id as usize, *group as usize)).collect::>(); + async fn get_frame_mapping( + &self, + db: &Pool, + ) -> Result>, sqlx::Error> { + let result: Vec<(i32, i32)> = + sqlx::query_as("SELECT Frame, WindowGroup FROM DiaFrameMsMsInfo;") + .fetch_all(db) + .await?; + + let frame_info = result + .iter() + .map(|(id, group)| (*id as usize, *group as usize)) + .collect::>(); let max_id = frame_info.iter().map(|(id, _)| id).max().unwrap(); let mut ids_map_vec = vec![None; max_id + 1]; @@ -481,7 +522,10 @@ impl FrameInfoBuilder { Ok(ids_map_vec) } - async fn get_frame_windows(&self, db: &Pool) -> Result<(Vec>>, GroupingLevel), sqlx::Error> { + async fn get_frame_windows( + &self, + db: &Pool, + ) -> Result<(Vec>>, GroupingLevel, Vec), sqlx::Error> { let result: Vec = sqlx::query_as::<_, DiaFrameMsMsWindowInfo>( "SELECT WindowGroup, @@ -492,10 +536,14 @@ impl FrameInfoBuilder { CollisionEnergy FROM DiaFrameMsMsWindows", ) - .fetch_all(db).await.unwrap(); + .fetch_all(db) + .await + .unwrap(); let grouping_level = if result.len() > 200 { - log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diagonal PASEF?)"); + log::info!( + "More than 200 scan ranges, using WindowGroup grouping level. (diagonal PASEF?)" + ); GroupingLevel::WindowGroup } else { log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diaPASEF?)"); @@ -511,6 +559,7 @@ impl FrameInfoBuilder { let mut group_map_vec: Vec>> = vec![None; max_window_id + 1]; let mut scangroup_id = 0; + let mut row_to_group = Vec::new(); for window in result { // TODO this is maybe a good place to make the trouping ... // If its diapasef, the groups are quad+window groups. @@ -523,14 +572,23 @@ impl FrameInfoBuilder { match &mut group_map_vec[usize_wg] { None => continue, Some(scan_ranges) => { - scan_ranges.push(window.into_scan_range(scangroup_id.clone(), &self.scan_converter)); + scan_ranges + .push(window.into_scan_range(scangroup_id, &self.scan_converter)); scangroup_id += 1; } } + + match grouping_level { + GroupingLevel::WindowGroup => { + row_to_group.push(usize_wg); + } + GroupingLevel::QuadWindowGroup => { + row_to_group.push(scangroup_id); + } + } } - Ok((group_map_vec, grouping_level)) + Ok((group_map_vec, grouping_level, row_to_group)) } - } // TODO refactor this to make it a constructor method ... From 31e29c116faf41900ad1ce68d1fc7981d5ba4fec Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 6 Jul 2024 00:41:36 -0700 Subject: [PATCH 04/26] further implementation of delayed denseframe conversion and early splitting and format --- .pre-commit-config.yaml | 8 + src/aggregation/chromatograms.rs | 33 +-- src/aggregation/converters.rs | 3 +- src/aggregation/dbscan.rs | 4 +- src/aggregation/mod.rs | 6 +- src/aggregation/ms_denoise.rs | 22 +- src/aggregation/tracing.rs | 34 ++- src/main.rs | 43 ++-- src/ms/frames.rs | 155 +++++++++---- src/ms/tdf.rs | 359 ++++++++++++++----------------- src/space/quad.rs | 2 +- src/space/space_generics.rs | 1 - 12 files changed, 365 insertions(+), 305 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fd16ba2..614e968 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,3 +8,11 @@ repos: - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files +- repo: local + hooks: + - id: rustfmt + name: rustfmt + description: Check if all files follow the rustfmt style + entry: cargo fmt --all -- --check --color always + language: system + pass_filenames: false diff --git a/src/aggregation/chromatograms.rs b/src/aggregation/chromatograms.rs index ecd21bd..8cc357f 100644 --- a/src/aggregation/chromatograms.rs +++ b/src/aggregation/chromatograms.rs @@ -1,4 +1,3 @@ - use log::warn; use num_traits::AsPrimitive; @@ -16,7 +15,10 @@ pub struct BTreeChromatogram { } #[derive(Debug, Clone, Copy)] -pub struct ChromatogramArray + AddAssign + Default + AsPrimitive, const NBINS: usize>{ +pub struct ChromatogramArray< + T: Mul + AddAssign + Default + AsPrimitive, + const NBINS: usize, +> { pub chromatogram: [T; NBINS], pub rt_binsize: f32, pub rt_bin_offset: Option, @@ -103,7 +105,7 @@ impl BTreeChromatogram { // Check that the bin size is almost the same let binsize_diff = (self.rt_binsize - other.rt_binsize).abs(); if binsize_diff > 0.01 { - return None + return None; } // This would be the offset needed to align the two chromatograms @@ -111,7 +113,8 @@ impl BTreeChromatogram { // be bin number `other_vs_self_offset` in other. // This line will also return None if either of the chromatograms // has no bin offset set. - let other_vs_self_offset = ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; + let other_vs_self_offset = + ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; let (min, max) = self.int_range()?; let (min_o, max_o) = other.int_range()?; @@ -137,7 +140,10 @@ impl BTreeChromatogram { Some(cosine) } - pub fn as_chromatogram_array(&self, center_rt: Option) -> ChromatogramArray { + pub fn as_chromatogram_array( + &self, + center_rt: Option, + ) -> ChromatogramArray { let mut chromatogram_arr = [0.; NUM_LOCAL_CHROMATOGRAM_BINS]; let max_chr_arr_width = NUM_LOCAL_CHROMATOGRAM_BINS as f32 * self.rt_binsize; @@ -151,7 +157,8 @@ impl BTreeChromatogram { // The chromatogram uses the bin size of the chromatogram btree // but re-centers it to the mean RT of the trace if !self.btree.is_empty() { - let int_center = ((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32; + let int_center = + ((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32; let left_start = int_center - (NUM_LOCAL_CHROMATOGRAM_BINS / 2) as i32; for i in 0..NUM_LOCAL_CHROMATOGRAM_BINS { @@ -168,13 +175,14 @@ impl BTreeChromatogram { } } -impl + AddAssign + Default + AsPrimitive, const NBINS:usize> ChromatogramArray { - +impl + AddAssign + Default + AsPrimitive, const NBINS: usize> + ChromatogramArray +{ pub fn cosine_similarity(&self, other: &Self) -> Option { // Check that the bin size is almost the same let binsize_diff = (self.rt_binsize - other.rt_binsize).abs(); if binsize_diff > 0.01 { - return None + return None; } // This would be the offset needed to align the two chromatograms @@ -182,7 +190,8 @@ impl + AddAssign + Default + AsPrimitive, const NBINS:us // be bin number `other_vs_self_offset` in other. // This line will also return None if either of the chromatograms // has no bin offset set. - let other_vs_self_offset = ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; + let other_vs_self_offset = + ((other.rt_bin_offset? - self.rt_bin_offset?) / self.rt_binsize) as i32; let mut dot = T::default(); let mut mag_a = T::default(); @@ -247,7 +256,7 @@ mod chromatogram_tests { } #[test] - fn test_chromatogram_array_cosine(){ + fn test_chromatogram_array_cosine() { let mut c = ChromatogramArray:: { chromatogram: [0; 5], rt_binsize: 1., @@ -277,7 +286,6 @@ mod chromatogram_tests { c.chromatogram[4] = 20; let cosine = c.cosine_similarity(&c2).unwrap(); assert!(cosine <= 0.9, "Cosine: {}", cosine); - } #[test] @@ -305,7 +313,6 @@ mod chromatogram_tests { c.add(2., 3); c.add(5., 5); - let mut c2 = BTreeChromatogram::new(1., 1.55); // With bin offset of 1.55 and binsize 1.0, bin 0 is [1.55, 2.55) diff --git a/src/aggregation/converters.rs b/src/aggregation/converters.rs index ebffcc4..28ed4ce 100644 --- a/src/aggregation/converters.rs +++ b/src/aggregation/converters.rs @@ -1,7 +1,6 @@ - use crate::ms::frames::TimsPeak; -use crate::space::space_generics::NDPointConverter; use crate::space::space_generics::NDPoint; +use crate::space::space_generics::NDPointConverter; // https://github.com/rust-lang/rust/issues/35121 // The never type is not stable yet.... diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan.rs index 82ce70b..c18f86c 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan.rs @@ -249,8 +249,6 @@ fn _dbscan< let mut seed_set: Vec<&usize> = Vec::new(); seed_set.extend(neighbors); - let mut internal_neighbor_additions = 0; - while let Some(neighbor) = seed_set.pop() { let neighbor_index = *neighbor; if cluster_labels[neighbor_index] == ClusterLabel::Noise { @@ -316,7 +314,6 @@ fn _dbscan< }); local_neighbor_filter_timer.stop(false); - internal_neighbor_additions += local_neighbors.len(); seed_set.extend(local_neighbors); } } @@ -409,6 +406,7 @@ fn reassign_centroid< timer.stop(true); out } + // TODO: rename prefiltered peaks argument! // TODO implement a version that takes a sparse distance matrix. diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 04e8dc4..43ed723 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -1,6 +1,6 @@ +pub mod aggregators; +pub mod chromatograms; +pub mod converters; pub mod dbscan; pub mod ms_denoise; -pub mod converters; -pub mod aggregators; pub mod tracing; -pub mod chromatograms; diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index edf1572..0fad03e 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -4,7 +4,7 @@ use crate::aggregation::dbscan; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; -use crate::ms::frames::FrameQuadWindow; +use crate::ms::frames::FrameSlice; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; use crate::utils; @@ -151,14 +151,14 @@ fn _denoise_dia_frame( .get_dia_frame_window_group(frame.index) .unwrap(); let frame_windows = dia_frame_info - .split_frame(frame, window_group) + .split_frame(&frame, window_group) .expect("Only DIA frames should be passed to this function"); frame_windows .into_iter() .map(|frame_window| { - denoise_frame_window( - frame_window, + denoise_frame_slice( + &frame_window, ims_converter, mz_converter, dia_frame_info, @@ -173,8 +173,8 @@ fn _denoise_dia_frame( .collect::>() } -fn denoise_frame_window( - frame_window: FrameQuadWindow, +fn denoise_frame_slice( + frame_window: &FrameSlice, ims_converter: &timsrust::Scan2ImConverter, mz_converter: &timsrust::Tof2MzConverter, dia_frame_info: &DIAFrameInfo, @@ -203,8 +203,8 @@ fn denoise_frame_window( DenseFrameWindow { frame: denoised_frame, - ims_start: denseframe_window.ims_start, - ims_end: denseframe_window.ims_end, + ims_min: denseframe_window.ims_min, + ims_max: denseframe_window.ims_max, mz_start: denseframe_window.mz_start, mz_end: denseframe_window.mz_end, group_id: denseframe_window.group_id, @@ -306,7 +306,7 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> { info!("Denoising {} frames", elems.len()); - let frame_window_slices = self.dia_frame_info.split_frame_windows(elems); + let frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); let mut out = Vec::with_capacity(frame_window_slices.len()); for sv in frame_window_slices { let progbar = indicatif::ProgressBar::new(sv.len() as u64); @@ -314,8 +314,8 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> .into_par_iter() .progress_with(progbar) .map(|x| { - denoise_frame_window( - x, + denoise_frame_slice( + &x, &self.ims_converter, &self.mz_converter, &self.dia_frame_info, diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index a76def0..20e7b68 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -1,18 +1,20 @@ -use crate::aggregation::dbscan::dbscan_generic; use crate::aggregation::aggregators::ClusterAggregator; +use crate::aggregation::chromatograms::{ + BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, +}; +use crate::aggregation::dbscan::dbscan_generic; use crate::ms::frames::DenseFrameWindow; +use crate::space::space_generics::NDBoundary; use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike}; use crate::utils; use crate::utils::RollingSDCalculator; -use crate::space::space_generics::NDBoundary; -use crate::aggregation::chromatograms::{BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS}; +use core::panic; use log::{debug, error, info, warn}; use rayon::iter::IntoParallelIterator; use rayon::prelude::*; +use serde::ser::{SerializeStruct, Serializer}; use serde::{Deserialize, Serialize}; -use serde::ser::{Serializer, SerializeStruct}; -use core::panic; use std::error::Error; use std::io::Write; use std::path::Path; @@ -93,10 +95,12 @@ impl Serialize for BaseTrace { state.serialize_field("chromatogram", &format!("{:?}", chromatogram))?; state.end() } - } -pub fn write_trace_csv(traces: &Vec, path: impl AsRef) -> Result<(), Box> { +pub fn write_trace_csv( + traces: &Vec, + path: impl AsRef, +) -> Result<(), Box> { let mut wtr = csv::Writer::from_path(path).unwrap(); for trace in traces { wtr.serialize(trace)?; @@ -272,7 +276,6 @@ pub fn combine_traces( out } - #[derive(Debug, Clone)] struct TraceAggregator { mz: RollingSDCalculator, @@ -313,7 +316,8 @@ impl ClusterAggregator for TraceAggregator { // The chromatogram is an array centered on the retention time let num_rt_points = self.btree_chromatogram.btree.len(); - let chromatogram: ChromatogramArray = self.btree_chromatogram.as_chromatogram_array(Some(rt)); + let chromatogram: ChromatogramArray = + self.btree_chromatogram.as_chromatogram_array(Some(rt)); // let apex = chromatogram.chromatogram.iter().enumerate().max_by_key(|x| (x.1 * 100.) as i32).unwrap().0; // let apex_offset = (apex as f32 - (NUM_LOCAL_CHROMATOGRAM_BINS as f32 / 2.)) * self.btree_chromatogram.rt_binsize; @@ -391,7 +395,6 @@ impl NDPointConverter for BypassBaseTraceBackConverter { } } - fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec { denseframe_windows .into_iter() @@ -415,7 +418,6 @@ fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec bool; - // TODO maybe this can be a builder-> executor pattern fn _combine_single_window_traces( prefiltered_peaks: Vec, @@ -603,10 +605,7 @@ impl NDPointConverter for BaseTraceConverter { fn convert_to_bounds_query<'a>( &self, point: &'a NDPoint<3>, - ) -> ( - NDBoundary<3>, - Option<&'a NDPoint<3>>, - ) { + ) -> (NDBoundary<3>, Option<&'a NDPoint<3>>) { const NUM_DIMENTIONS: usize = 3; // let range_center = (point.values[1] + point.values[2]) / 2.; let mut starts = point.values; @@ -647,9 +646,6 @@ impl NDPointConverter for PseudoScanBackConverter { } } - - - #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct PseudoscanGenerationConfig { pub rt_scaling: f32, @@ -677,7 +673,6 @@ impl Default for PseudoscanGenerationConfig { } } - pub fn combine_pseudospectra( traces: Vec, config: PseudoscanGenerationConfig, @@ -689,7 +684,6 @@ pub fn combine_pseudospectra( rt_scaling: config.rt_scaling.into(), ims_scaling: config.ims_scaling.into(), quad_scaling: config.quad_scaling.into(), - // rt_start_end_ratio: 2., // peak_width_prior: 0.75, }; diff --git a/src/main.rs b/src/main.rs index 165b049..43081d6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -108,24 +108,31 @@ fn main() { } // TODO: consier moving this to the config struct as an implementation. - let out_path_scans = config.output_config.debug_scans_json.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); - let out_traces_path = config.output_config.debug_traces_csv.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); - let out_path_features = config.output_config.out_features_csv.as_ref().map(|path| out_path_dir.join(path).to_path_buf()); + let out_path_scans = config + .output_config + .debug_scans_json + .as_ref() + .map(|path| out_path_dir.join(path).to_path_buf()); + let out_traces_path = config + .output_config + .debug_traces_csv + .as_ref() + .map(|path| out_path_dir.join(path).to_path_buf()); + let out_path_features = config + .output_config + .out_features_csv + .as_ref() + .map(|path| out_path_dir.join(path).to_path_buf()); log::info!("Reading DIA data from: {}", path_use); - let (dia_frames, dia_info) = aggregation::ms_denoise::read_all_dia_denoising( - path_use.clone(), - config.denoise_config, - ); + let (dia_frames, dia_info) = + aggregation::ms_denoise::read_all_dia_denoising(path_use.clone(), config.denoise_config); let cycle_time = dia_info.calculate_cycle_time(); // TODO add here expansion limits - let mut traces = aggregation::tracing::combine_traces( - dia_frames, - config.tracing_config, - cycle_time, - ); + let mut traces = + aggregation::tracing::combine_traces(dia_frames, config.tracing_config, cycle_time); let out = match out_traces_path { Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), @@ -147,23 +154,19 @@ fn main() { // Maybe reparametrize as 1.1 cycle time // TODO add here expansion limits - let mut pseudoscans = aggregation::tracing::combine_pseudospectra( - traces, - config.pseudoscan_generation_config, - ); + let mut pseudoscans = + aggregation::tracing::combine_pseudospectra(traces, config.pseudoscan_generation_config); // Report min/max/average/std and skew for ims and rt // This can probably be a macro ... - let ims_stats = - utils::get_stats(&pseudoscans.iter().map(|x| x.ims as f64).collect::>()); + let ims_stats = utils::get_stats(&pseudoscans.iter().map(|x| x.ims as f64).collect::>()); let ims_sd_stats = utils::get_stats( &pseudoscans .iter() .map(|x| x.ims_std as f64) .collect::>(), ); - let rt_stats = - utils::get_stats(&pseudoscans.iter().map(|x| x.rt as f64).collect::>()); + let rt_stats = utils::get_stats(&pseudoscans.iter().map(|x| x.rt as f64).collect::>()); let rt_sd_stats = utils::get_stats( &pseudoscans .iter() diff --git a/src/ms/frames.rs b/src/ms/frames.rs index a2ec67c..daf8cc8 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -1,4 +1,3 @@ - pub use timsrust::Frame; pub use timsrust::FrameType; pub use timsrust::{ @@ -38,6 +37,25 @@ pub enum SortingOrder { Intensity, } +/// Information on the context of a window in a frame. +/// +/// This adds to a frame slice the context of the what isolation was used +/// to generate the frame slice. +#[derive(Debug, Clone)] +pub struct FrameMsMsWindowInfo { + pub mz_start: f32, + pub mz_end: f32, + pub window_group_id: usize, + pub within_window_quad_group_id: usize, + pub global_quad_row_id: usize, +} + +#[derive(Debug, Clone)] +pub enum MsMsFrameSliceWindowInfo { + WindowGroup(usize), + SingleWindow(FrameMsMsWindowInfo), +} + /// Unprocessed data from a 'Frame' after breaking by quad isolation_window + ims window. /// /// 1. every tof-index + intensity represents a peak. @@ -52,31 +70,68 @@ pub enum SortingOrder { /// calibration) /// /// Frame Example values -/// - Scan offsets. [0,0,0,0,0,3,5,6 ...] n=number of scans -/// - tof indices. [100, 101, 102, 10, 20, 30 ...] len = len(intensities) -/// - intensities. [123, 111, 12 , 3, 4, 1 ...] len = len(tof indices) -/// - index 34 +/// - Scan offsets. `[0,0,0,0,0,3,5,6 ...]` n=number of scans +/// - tof indices. `[100, 101, 102, 10, 20, 30 ...]` len = len(intensities) +/// - intensities. `[123, 111, 12 , 3, 4, 1 ...]` len = len(tof indices) /// - rt 65.34 -/// Additions for FrameQuadWindow: +/// +/// Renamed from the frame: +/// - parent_frame_index 34 // renamed from Frame.index for clarity. +/// +/// Additions for FrameSlice: /// - scan_start 123 // The scan number of the first scan offset in the current window. -/// - group_id 1 // The group id of the current window. -/// - quad_group_id 2 // The quad group id of the current window within the current group. -/// - quad_row_id 3 // The quad row id of the current window within all quad windows. +/// - slice_window_info Some(MsMsFrameSliceWindowInfo::SingleWindow(FrameMsMsWindow)) #[derive(Debug, Clone)] -pub struct FrameQuadWindow { - pub scan_offsets: Vec, - pub tof_indices: Vec, - pub intensities: Vec, - pub index: usize, +pub struct FrameSlice<'a> { + pub scan_offsets: &'a [usize], + pub tof_indices: &'a [u32], + pub intensities: &'a [u32], + pub parent_frame_index: usize, pub rt: f64, pub frame_type: FrameType, // From this point on they are local implementations // Before they are used from the timsrust crate. pub scan_start: usize, - pub group_id: usize, - pub quad_group_id: usize, - pub quad_row_id: usize, + pub slice_window_info: Option, +} + +impl<'a> FrameSlice<'a> { + pub fn slice_frame( + frame: &'a Frame, + scan_start: usize, + scan_end: usize, + slice_window_info: Option, + ) -> FrameSlice<'a> { + let scan_offsets = &frame.scan_offsets[scan_start..=scan_end]; + let scan_start = scan_offsets[0]; + + let indprt_start = scan_offsets[0]; + let indptr_end = *scan_offsets.last().expect("Scan range is empty"); + + let tof_indices = &frame.tof_indices[indprt_start..indptr_end]; + let intensities = &frame.intensities[indprt_start..indptr_end]; + debug_assert!(tof_indices.len() == intensities.len()); + debug_assert!(indptr_end - indprt_start == tof_indices.len() as usize); + #[cfg(debug_assertions)] + { + for i in 1..(scan_offsets.len() - 1) { + debug_assert!(scan_offsets[i] <= scan_offsets[i + 1]); + debug_assert!((scan_offsets[i + 1] - scan_start) <= tof_indices.len() as usize); + } + } + + FrameSlice { + scan_offsets, + tof_indices, + intensities, + parent_frame_index: frame.index, + rt: frame.rt, + frame_type: frame.frame_type, + scan_start, + slice_window_info: slice_window_info, + } + } } #[derive(Debug, Clone)] @@ -91,8 +146,8 @@ pub struct DenseFrame { #[derive(Debug, Clone)] pub struct DenseFrameWindow { pub frame: DenseFrame, - pub ims_start: f32, - pub ims_end: f32, + pub ims_min: f32, + pub ims_max: f32, pub mz_start: f64, pub mz_end: f64, pub group_id: usize, @@ -101,36 +156,60 @@ pub struct DenseFrameWindow { impl DenseFrameWindow { pub fn from_frame_window( - frame_window: FrameQuadWindow, + frame_window: &FrameSlice, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, dia_info: &DIAFrameInfo, ) -> DenseFrameWindow { - let group_id = frame_window.group_id; - let quad_group_id = frame_window.quad_group_id; - let scan_start = frame_window.scan_start; + let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { + None => { + panic!("No window info") + // This branch points to an error in logic ... + // The window info should always be present in this context. + } + Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { + // This branch should be easy to implement for things like synchro pasef... + // Some details to iron out though ... + panic!("Not implemented") + } + Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { + let window_group_id = x.window_group_id; + let ww_quad_group_id = x.within_window_quad_group_id; + let scan_start = frame_window.scan_start; + (window_group_id, ww_quad_group_id, scan_start) + } + }; // NOTE: I am swapping here the 'scan start' to be the `ims_end` because // the first scans have lower 1/k0 values. - let ims_end = ims_converter.convert(scan_start as u32) as f32; - let ims_start = + let ims_max = ims_converter.convert(scan_start as u32) as f32; + let ims_min = ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; - let scan_range: &ScanRange = dia_info - .get_quad_windows(group_id, quad_group_id) - .expect("Quad group id should be valid"); - let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); + debug_assert!(ims_max <= ims_min); + + let scan_range: Option<&ScanRange> = + dia_info.get_quad_windows(window_group_id, ww_quad_group_id); + let scan_range = match scan_range { + Some(x) => x, + None => { + panic!( + "No scan range for window_group_id: {}, within_window_quad_group_id: {}", + window_group_id, ww_quad_group_id + ); + } + }; - debug_assert!(ims_start <= ims_end); + let frame = DenseFrame::from_frame_window(&frame_window, ims_converter, mz_converter); DenseFrameWindow { frame, - ims_start, - ims_end, + ims_min, + ims_max, mz_start: scan_range.iso_low as f64, mz_end: scan_range.iso_high as f64, - group_id, - quad_group_id, + group_id: window_group_id, + quad_group_id: ww_quad_group_id, } } } @@ -183,7 +262,7 @@ impl DenseFrame { } pub fn from_frame_window( - frame_window: FrameQuadWindow, + frame_window: &FrameSlice, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, ) -> DenseFrame { @@ -193,7 +272,7 @@ impl DenseFrame { let num_tofs = index_offset - last_scan_offset; let scan_index_use = (scan_index + frame_window.scan_start) as u32; - let ims = ims_converter.convert(scan_index_use) as f32; + let ims = ims_converter.convert(scan_index as f64) as f32; if ims < 0.0 { info!("Negative IMS value: {}", ims); info!("scan_index_use: {}", scan_index_use); @@ -204,7 +283,7 @@ impl DenseFrame { expanded_scan_indices.extend(vec![ims; num_tofs as usize]); last_scan_offset = *index_offset; } - debug_assert!(last_scan_offset == frame_window.tof_indices.len() as u64); + debug_assert!(last_scan_offset == frame_window.tof_indices.len()); let peaks = expanded_scan_indices .iter() @@ -224,7 +303,7 @@ impl DenseFrame { } } - let index = frame_window.index; + let index = frame_window.parent_frame_index; let rt = frame_window.rt; let frame_type = frame_window.frame_type; diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index 2d5f5c5..87d44b3 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -1,13 +1,13 @@ use log::{debug, info, trace}; use sqlx::Pool; -use sqlx::{FromRow, Row, Sqlite, SqlitePool}; -use std::path::{Path}; +use sqlx::{FromRow, Sqlite, SqlitePool}; +use std::path::Path; use timsrust::{ConvertableIndex, Frame}; use tokio; use tokio::runtime::Runtime; -use crate::ms::frames::{DenseFrame, DenseFrameWindow, FrameQuadWindow}; +use crate::ms::frames::{FrameMsMsWindowInfo, FrameSlice, MsMsFrameSliceWindowInfo}; // Diaframemsmsinfo = vec of frame_id -> windowgroup_id // diaframemsmswindows = vec[(windowgroup_id, scanstart, scanend, iso_mz, iso_with, nce)] @@ -24,11 +24,15 @@ pub struct ScanRange { pub ims_end: f32, pub iso_low: f32, pub iso_high: f32, + pub window_group_id: usize, + pub within_window_quad_group_id: usize, } impl ScanRange { pub fn new( row_id: usize, + window_group_id: usize, + within_window_quad_group_id: usize, scan_start: usize, scan_end: usize, iso_mz: f32, @@ -57,13 +61,27 @@ impl ScanRange { ims_end: ims_end as f32, iso_low, iso_high, + window_group_id, + within_window_quad_group_id, + } + } +} + +impl Into for ScanRange { + fn into(self) -> FrameMsMsWindowInfo { + FrameMsMsWindowInfo { + mz_start: self.iso_low, + mz_end: self.iso_high, + window_group_id: self.window_group_id.into(), + within_window_quad_group_id: self.within_window_quad_group_id.into(), + global_quad_row_id: self.row_id.into(), } } } #[derive(Debug, Clone)] pub struct DIAWindowGroup { - pub id: usize, + pub window_group_id: usize, pub scan_ranges: Vec, } @@ -92,8 +110,10 @@ pub struct DIAFrameInfo { impl DIAFrameInfo { pub fn get_dia_frame_window_group(&self, frame_id: usize) -> Option<&DIAWindowGroup> { let group_id = self.frame_groups[frame_id]; - group_id?; - self.groups[group_id.unwrap()].as_ref() + match group_id { + None => None, + Some(group_id) => self.groups[group_id].as_ref(), + } } async fn rts_from_tdf_connection(conn: &Pool) -> Result>, sqlx::Error> { @@ -156,55 +176,87 @@ impl DIAFrameInfo { avg_cycle_time } - pub fn split_frame(&self, frame: Frame, window_group: &DIAWindowGroup) -> Result, &'static str> { + pub fn split_frame<'a, 'b>( + &'b self, + frame: &'a Frame, + window_group: &DIAWindowGroup, + ) -> Result, &'static str> + where + 'a: 'b, + { // let group = self // .get_dia_frame_window_group(frame.index) // .expect("Frame not in DIA group, non splittable frame passed to split_frame."); let mut out_frames = Vec::new(); - for (i, scan_range) in window_group.scan_ranges.iter().enumerate() { - scan_range.scan_start; - scan_range.scan_end; - - let scan_offsets_use = - &frame.scan_offsets[scan_range.scan_start..(scan_range.scan_end - 1)]; - let scan_start = scan_offsets_use[0]; - let mz_indptr_start = scan_offsets_use[0]; - let mz_indptr_end = *scan_offsets_use.last().unwrap(); - - let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec(); - let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec(); - - let frame_window = FrameQuadWindow { - scan_offsets: scan_offsets_use - .iter() - .map(|x| (x - scan_start) as u64) - .collect::>(), - tof_indices: tof_indices_keep, - intensities: intensities_keep, - index: frame.index, - rt: frame.rt, - frame_type: frame.frame_type, - scan_start: scan_range.scan_start, - group_id: window_group.id, - quad_group_id: i, - quad_row_id: scan_range.row_id, - }; - - out_frames.push(frame_window); + for scan_range in window_group.scan_ranges.iter() { + let slice_w_info: MsMsFrameSliceWindowInfo = + MsMsFrameSliceWindowInfo::SingleWindow(scan_range.clone().into()); + let frame_slice = FrameSlice::slice_frame( + &frame, + scan_range.scan_start, + scan_range.scan_end, + Some(slice_w_info), + ); + out_frames.push(frame_slice); + + // TODO remove this old implementation + // for (i, scan_range) in window_group.scan_ranges.iter().enumerate() { + + // scan_range.scan_start; + // scan_range.scan_end; + + // let scan_offsets_use = + // &frame.scan_offsets[scan_range.scan_start..(scan_range.scan_end - 1)]; + // let scan_start = scan_offsets_use[0]; + // let mz_indptr_start = scan_offsets_use[0]; + // let mz_indptr_end = *scan_offsets_use.last().unwrap(); + + // let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec(); + // let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec(); + + // let frame_window = FrameSlice { + // scan_offsets: scan_offsets_use + // .iter() + // .map(|x| (x - scan_start) as u64) + // .collect::>(), + // tof_indices: tof_indices_keep, + // intensities: intensities_keep, + // index: frame.index, + // rt: frame.rt, + // frame_type: frame.frame_type, + // scan_start: scan_range.scan_start, + // group_id: window_group.id, + // quad_group_id: i, + // quad_row_id: scan_range.row_id, + // }; + + // out_frames.push(frame_window); } Ok(out_frames) } - pub fn split_frame_windows(&self, frames: Vec) -> Vec> { + pub fn split_frame_windows<'a>(&'a self, frames: &'a [Frame]) -> Vec> { let mut out = Vec::new(); - for _ in 0..self.groups.len() { - out.push(Vec::new()); + + match self.grouping_level { + GroupingLevel::WindowGroup => { + for _ in 0..(self.groups.len() + 1) { + out.push(Vec::new()); + } + } + GroupingLevel::QuadWindowGroup => { + for _ in 0..(self.row_to_group.len() + 1) { + out.push(Vec::new()); + } + } } for frame in frames { - let group = self.get_dia_frame_window_group(frame.index).expect("Frame is not in MS2 frames"); + let group = self + .get_dia_frame_window_group(frame.index) + .expect("Frame is not in MS2 frames"); match self.grouping_level { GroupingLevel::WindowGroup => { @@ -212,9 +264,21 @@ impl DIAFrameInfo { //out[group.id].push(frame_window); } GroupingLevel::QuadWindowGroup => { - let frame_windows = self.split_frame(frame, group).expect("Error splitting frame"); + let frame_windows = self + .split_frame(&frame, group) + .expect("Error splitting frame"); for frame_window in frame_windows { - out[frame_window.quad_group_id].push(frame_window); + match &frame_window.slice_window_info { + None => { + panic!("Frame window has no slice window info") + } + Some(MsMsFrameSliceWindowInfo::SingleWindow(scan_range)) => { + out[scan_range.global_quad_row_id].push(frame_window); + } + Some(MsMsFrameSliceWindowInfo::WindowGroup(group)) => { + out[*group].push(frame_window); + } + } } } } @@ -225,150 +289,15 @@ impl DIAFrameInfo { group.sort_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); } - out - } - - pub fn split_dense_frame(&self, mut denseframe: DenseFrame) -> Vec { - let group = self - .get_dia_frame_window_group(denseframe.index) - .expect("Frame not in DIA group"); - - // Steps - // 1. Sort by ims - // 2. Get ims bounds - // 3. Binary search for start and end - denseframe.sort_by_mobility(); - let mut frames = Vec::new(); - let imss = denseframe - .raw_peaks - .iter() - .map(|peak| peak.mobility) - .collect::>(); - for (i, scan_range) in group.scan_ranges.iter().enumerate() { - let start = imss.binary_search_by(|v| { - v.partial_cmp(&scan_range.ims_start) - .expect("Couldn't compare values") - }); - - let start = match start { - Ok(x) => x, - Err(x) => x, - }; - - let end = imss.binary_search_by(|v| { - v.partial_cmp(&scan_range.ims_end) - .expect("Couldn't compare values") - }); - - // i might need to add 1 here to make the range [closed, open) - let end = match end { - Ok(x) => x, - Err(x) => x, - }; - - let frame = DenseFrame { - raw_peaks: denseframe.raw_peaks[start..end].to_vec(), - index: denseframe.index, - rt: denseframe.rt, - frame_type: denseframe.frame_type, - sorted: denseframe.sorted, - }; - - let frame_window = DenseFrameWindow { - frame, - ims_start: scan_range.ims_start, - ims_end: scan_range.ims_end, - mz_start: scan_range.iso_low.into(), - mz_end: scan_range.iso_high.into(), - group_id: group.id, - quad_group_id: i, - }; - frames.push(frame_window); - } - - frames - } - - /// Returns a vector of length equal to the number of groups. - /// Each element is a vector of frames that belong to that group. - fn bundle_by_group(&self, frames: Vec) -> Vec> { - let mut frame_groups = Vec::new(); - for frame in frames { - let group = self.get_dia_frame_window_group(frame.index); - if group.is_none() { - continue; - } - let group = group.unwrap(); - let group_id = group.id; - if frame_groups.len() <= group_id { - frame_groups.resize(group_id + 1, Vec::new()); - } - frame_groups[group_id].push(frame); - } - frame_groups - } - - pub fn split_dense_frames(&self, frames: Vec) -> Vec>> { - info!("Splitting {} frames", frames.len()); - - // Returns a vector of length equal to the number of groups. - // Each element is a vector with length equal to the number of quad groups within - // that group. - // Each element of that vector is a vector of frames that belong to that quad group. - let max_num_quad_groups = self - .groups - .iter() - .map(|group| { - if group.is_none() { - 0 - } else { - group.as_ref().unwrap().scan_ranges.len() - } - }) - .max() - .unwrap(); - - let num_groups = self.groups.len(); - - let mut out = Vec::new(); - for _ in 0..num_groups { - let mut group_vec = Vec::new(); - for _ in 0..max_num_quad_groups { - group_vec.push(Vec::new()); - } - out.push(group_vec); - } - - let bundled_split_frames = self.bundle_by_group(frames); - for (i, frame_bundle) in bundled_split_frames.into_iter().enumerate() { - info!("Processing group {}", i); - for frame in frame_bundle { - let frame_windows = self.split_dense_frame(frame); - for frame_window in frame_windows { - out[i][frame_window.quad_group_id].push(frame_window); + // Debug assert that the frames are sorted by rt + if cfg!(debug_assertions) { + for group in out.iter() { + for i in 0..(group.len() - 1) { + debug_assert!(group[i].rt <= group[i + 1].rt); } } } - let counts = out - .iter() - .map(|group| { - group - .iter() - .map(|quad_group| quad_group.len()) - .collect::>() - }) - .collect::>(); - - trace!("Counts: {:?}", counts); - - for (i, group) in counts.iter().enumerate() { - trace!("Group {}", i); - for (j, quad_group) in group.iter().enumerate() { - trace!(" Quad group {}: {}", j, quad_group); - } - } - out } @@ -377,15 +306,33 @@ impl DIAFrameInfo { scan_group_id: usize, quad_group_id: usize, ) -> Option<&ScanRange> { - let group = self.groups[scan_group_id].as_ref()?; - let quad_group = group.scan_ranges.get(quad_group_id)?; + let group = self.groups[scan_group_id].as_ref(); + let group = match group { + None => { + panic!( + "Group not found for scan group id: {}, in groups n={}", + scan_group_id, + self.groups.len() + ) + } + Some(group) => group, + }; + + let quad_group = group.scan_ranges.get(quad_group_id); + let quad_group = match quad_group { + None => { + panic!( + "Quad group not found for quad group id: {}, in scan_ranges {:?}", + quad_group_id, group.scan_ranges + ) + } + Some(quad_group) => quad_group, + }; + Some(quad_group) } } -// TODO implement splitting frames into dia group+quad groups. -// [usize, math::round::floor(quad_mz_center)] - // Reference for the tables: // CREATE TABLE DiaFrameMsMsInfo ( @@ -408,18 +355,31 @@ impl DIAFrameInfo { #[derive(Clone, FromRow, Debug)] pub struct DiaFrameMsMsWindowInfo { + #[sqlx(rename = "WindowGroup")] pub window_group: i32, + #[sqlx(rename = "ScanNumBegin")] pub scan_num_begin: i32, + #[sqlx(rename = "ScanNumEnd")] pub scan_num_end: i32, + #[sqlx(rename = "IsolationMz")] pub isolation_mz: f32, + #[sqlx(rename = "IsolationWidth")] pub isolation_width: f32, + #[sqlx(rename = "CollisionEnergy")] pub collision_energy: f32, } impl DiaFrameMsMsWindowInfo { - fn into_scan_range(&self, id: usize, scan_converter: &timsrust::Scan2ImConverter) -> ScanRange { + fn into_scan_range( + &self, + id: usize, + quad_id: usize, + scan_converter: &timsrust::Scan2ImConverter, + ) -> ScanRange { ScanRange::new( id, + self.window_group as usize, + quad_id, self.scan_num_begin as usize, self.scan_num_end as usize, self.isolation_mz, @@ -480,10 +440,19 @@ impl FrameInfoBuilder { None => continue, Some(scan_ranges) => scan_ranges, }; + debug!("Scan ranges i={}: {:?}", i, scan_ranges); + if cfg!(debug_assertions) { + for scan_range in scan_ranges.iter() { + debug_assert!(scan_range.window_group_id == i) + } + }; if scan_ranges.is_empty() { continue; } else { - groups_vec_o[i] = Some(DIAWindowGroup { id: i, scan_ranges }); + groups_vec_o[i] = Some(DIAWindowGroup { + window_group_id: i, + scan_ranges, + }); } } @@ -546,7 +515,7 @@ impl FrameInfoBuilder { ); GroupingLevel::WindowGroup } else { - log::info!("More than 200 scan ranges, using WindowGroup grouping level. (diaPASEF?)"); + log::info!("Less than 200 scan ranges detected, using QuadWindowGroup grouping level. (diaPASEF?)"); GroupingLevel::QuadWindowGroup }; @@ -572,8 +541,12 @@ impl FrameInfoBuilder { match &mut group_map_vec[usize_wg] { None => continue, Some(scan_ranges) => { - scan_ranges - .push(window.into_scan_range(scangroup_id, &self.scan_converter)); + let quad_id = scan_ranges.len(); + scan_ranges.push(window.into_scan_range( + scangroup_id, + quad_id, + &self.scan_converter, + )); scangroup_id += 1; } } diff --git a/src/space/quad.rs b/src/space/quad.rs index 2d0b276..6a65c62 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -73,7 +73,7 @@ impl<'a, T> RadiusQuadTree<'a, T> { // This means any sub-division will be smaller than the radius let query_contained = radius_squared > distance_squared; - if (self.points.len() < self.capacity) || query_contained { + if (self.points.len() < self.capacity) || query_contained { self.points.push((point, data)); } else { self.subdivide(); diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index b3c9094..02e30f5 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -1,4 +1,3 @@ - #[derive(Debug, Clone, Copy)] pub struct NDBoundary { pub starts: [f32; DIMENSIONALITY], From b70c05dd2ed02fd5dddbde25fda0f7904c6dcd94 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 6 Jul 2024 13:26:41 -0700 Subject: [PATCH 05/26] added unit testing to tdf reading --- src/ms/mod.rs | 1 - src/ms/ms.rs | 1 - src/ms/tdf.rs | 48 +- tests/data/.gitignore | 4 + tests/data/README.md | 11 + tests/data/build.bash | 113 +++ tests/data/diapasef_tdf/README.md | 5 + .../data/diapasef_tdf/dia_frame_msms_info.tsv | 27 + .../dia_frame_msms_window_groups.tsv | 9 + .../diapasef_tdf/dia_frame_msms_windows.tsv | 17 + tests/data/diapasef_tdf/frames.tsv | 31 + tests/data/diapasef_tdf/global_metadata.tsv | 6 + tests/data/synchropasef_tdf/README.md | 5 + .../synchropasef_tdf/dia_frame_msms_info.tsv | 31 + .../dia_frame_msms_window_groups.tsv | 5 + .../dia_frame_msms_windows.tsv | 804 ++++++++++++++++++ tests/data/synchropasef_tdf/frames.tsv | 31 + .../data/synchropasef_tdf/global_metadata.tsv | 6 + tests/test_window_parsing.rs | 66 ++ 19 files changed, 1181 insertions(+), 40 deletions(-) delete mode 100644 src/ms/ms.rs create mode 100644 tests/data/.gitignore create mode 100644 tests/data/README.md create mode 100644 tests/data/build.bash create mode 100644 tests/data/diapasef_tdf/README.md create mode 100644 tests/data/diapasef_tdf/dia_frame_msms_info.tsv create mode 100644 tests/data/diapasef_tdf/dia_frame_msms_window_groups.tsv create mode 100644 tests/data/diapasef_tdf/dia_frame_msms_windows.tsv create mode 100644 tests/data/diapasef_tdf/frames.tsv create mode 100644 tests/data/diapasef_tdf/global_metadata.tsv create mode 100644 tests/data/synchropasef_tdf/README.md create mode 100644 tests/data/synchropasef_tdf/dia_frame_msms_info.tsv create mode 100644 tests/data/synchropasef_tdf/dia_frame_msms_window_groups.tsv create mode 100644 tests/data/synchropasef_tdf/dia_frame_msms_windows.tsv create mode 100644 tests/data/synchropasef_tdf/frames.tsv create mode 100644 tests/data/synchropasef_tdf/global_metadata.tsv create mode 100644 tests/test_window_parsing.rs diff --git a/src/ms/mod.rs b/src/ms/mod.rs index f5bc367..1e9820a 100644 --- a/src/ms/mod.rs +++ b/src/ms/mod.rs @@ -1,4 +1,3 @@ pub mod frames; -pub mod ms; pub mod sorting; pub mod tdf; diff --git a/src/ms/ms.rs b/src/ms/ms.rs deleted file mode 100644 index 8b13789..0000000 --- a/src/ms/ms.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index 87d44b3..af53611 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -101,6 +101,14 @@ pub struct DIAFrameInfo { pub retention_times: Vec>, pub grouping_level: GroupingLevel, pub number_of_groups: usize, + + /// The row to group is meant to map the `Isolation window row id` + /// to the grouping level it will have... for diaPASEF, since every + /// scan range has a different quand window, the number of distinct + /// groups is the number of scan ranges (window groups+scan range + /// combinations). For the case of diagonal PASEF, the number of + /// groups is the number of window groups, since the scan ranges + /// are not independent from each other. pub row_to_group: Vec, } @@ -184,10 +192,6 @@ impl DIAFrameInfo { where 'a: 'b, { - // let group = self - // .get_dia_frame_window_group(frame.index) - // .expect("Frame not in DIA group, non splittable frame passed to split_frame."); - let mut out_frames = Vec::new(); for scan_range in window_group.scan_ranges.iter() { let slice_w_info: MsMsFrameSliceWindowInfo = @@ -199,39 +203,6 @@ impl DIAFrameInfo { Some(slice_w_info), ); out_frames.push(frame_slice); - - // TODO remove this old implementation - // for (i, scan_range) in window_group.scan_ranges.iter().enumerate() { - - // scan_range.scan_start; - // scan_range.scan_end; - - // let scan_offsets_use = - // &frame.scan_offsets[scan_range.scan_start..(scan_range.scan_end - 1)]; - // let scan_start = scan_offsets_use[0]; - // let mz_indptr_start = scan_offsets_use[0]; - // let mz_indptr_end = *scan_offsets_use.last().unwrap(); - - // let tof_indices_keep = frame.tof_indices[mz_indptr_start..mz_indptr_end].to_vec(); - // let intensities_keep = frame.intensities[mz_indptr_start..mz_indptr_end].to_vec(); - - // let frame_window = FrameSlice { - // scan_offsets: scan_offsets_use - // .iter() - // .map(|x| (x - scan_start) as u64) - // .collect::>(), - // tof_indices: tof_indices_keep, - // intensities: intensities_keep, - // index: frame.index, - // rt: frame.rt, - // frame_type: frame.frame_type, - // scan_start: scan_range.scan_start, - // group_id: window_group.id, - // quad_group_id: i, - // quad_row_id: scan_range.row_id, - // }; - - // out_frames.push(frame_window); } Ok(out_frames) @@ -390,7 +361,8 @@ impl DiaFrameMsMsWindowInfo { } } -struct FrameInfoBuilder { +#[derive(Debug)] +pub struct FrameInfoBuilder { pub tdf_path: String, pub scan_converter: timsrust::Scan2ImConverter, } diff --git a/tests/data/.gitignore b/tests/data/.gitignore new file mode 100644 index 0000000..3c47299 --- /dev/null +++ b/tests/data/.gitignore @@ -0,0 +1,4 @@ + +**.tdf +**.d +**.tdf_bin diff --git a/tests/data/README.md b/tests/data/README.md new file mode 100644 index 0000000..9905bdf --- /dev/null +++ b/tests/data/README.md @@ -0,0 +1,11 @@ + +# Build testing data + +The main purpose of this is to have a reproducible way to generate +data for testing. This should keep the repo size small and allow for +ease of extension of the tests as we might see fit. + +One of the things I am attempting to do is to keep all the data represented +in plain text and then built onto the binary formats that actually get +used. The main rationale for is is transparency, improve the utility of +the source control and mild paranoia after the XZ exploit. diff --git a/tests/data/build.bash b/tests/data/build.bash new file mode 100644 index 0000000..a5f98e9 --- /dev/null +++ b/tests/data/build.bash @@ -0,0 +1,113 @@ +#!/bin/bash + +# set -x # Display expansions +set -e +set -u +set -o pipefail + +for x in *_tdf; do + echo "Processing $x" + dotd_name=$x/data.d + tdf_name=$dotd_name/analysis.tdf + tdf_bin_name=$dotd_name/analysis.tdf_bin + + # Check if the .d directory exists + if [ -d $x/data.d ]; then + echo "Directory $x/data.d exists" + rm -rf $x/data.d + fi + + echo "Creating $x/data.d does not exist" + mkdir $x/data.d + + tdf_create=" \ + CREATE TABLE DiaFrameMsMsInfo ( \ + Frame INTEGER PRIMARY KEY, \ + WindowGroup INTEGER NOT NULL, \ + FOREIGN KEY (Frame) REFERENCES Frames (Id), \ + FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) \ + ); \ + CREATE TABLE DiaFrameMsMsWindowGroups ( \ + Id INTEGER PRIMARY KEY \ + ); \ + CREATE TABLE DiaFrameMsMsWindows ( \ + WindowGroup INTEGER NOT NULL, \ + ScanNumBegin INTEGER NOT NULL, \ + ScanNumEnd INTEGER NOT NULL, \ + IsolationMz REAL NOT NULL, \ + IsolationWidth REAL NOT NULL, \ + CollisionEnergy REAL NOT NULL, \ + PRIMARY KEY(WindowGroup, ScanNumBegin), \ + FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) \ + ) WITHOUT ROWID; \ + CREATE TABLE TimsCalibration ( \ + Id INTEGER PRIMARY KEY, \ + ModelType INTEGER NOT NULL, \ + C0 \ + , C1, C2, C3, C4, C5, C6, C7, C8, C9); \ + CREATE TABLE MzCalibration ( \ + Id INTEGER PRIMARY KEY, \ + ModelType INTEGER NOT NULL, \ + DigitizerTimebase REAL NOT NULL, \ + DigitizerDelay REAL NOT NULL, \ + T1 REAL NOT NULL, \ + T2 REAL NOT NULL, \ + dC1 REAL NOT NULL, \ + dC2 REAL NOT NULL, \ + C0 \ + , C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11, C12, C13, C14); \ + CREATE TABLE Frames ( \ + Id INTEGER PRIMARY KEY, \ + Time REAL NOT NULL, \ + Polarity CHAR(1) CHECK (Polarity IN ('+', '-')) NOT NULL, \ + ScanMode INTEGER NOT NULL, \ + MsMsType INTEGER NOT NULL, \ + TimsId INTEGER, \ + MaxIntensity INTEGER NOT NULL, \ + SummedIntensities INTEGER NOT NULL, \ + NumScans INTEGER NOT NULL, \ + NumPeaks INTEGER NOT NULL, \ + MzCalibration INTEGER NOT NULL, \ + T1 REAL NOT NULL, \ + T2 REAL NOT NULL, \ + TimsCalibration INTEGER NOT NULL, \ + PropertyGroup INTEGER, \ + AccumulationTime REAL NOT NULL, \ + RampTime REAL NOT NULL, \ + Pressure REAL, \ + FOREIGN KEY (MzCalibration) REFERENCES MzCalibration (Id), \ + FOREIGN KEY (TimsCalibration) REFERENCES TimsCalibration (Id), \ + FOREIGN KEY (PropertyGroup) REFERENCES PropertyGroups (Id) \ + ); \ + CREATE TABLE GlobalMetadata ( \ + Key TEXT PRIMARY KEY, \ + Value TEXT \ + ); \ + " + + echo "Creating tables" + sqlite3 ${tdf_name} "${tdf_create}" + # Show schema + sqlite3 ${tdf_name} ".schema" + + echo "DiaFrameMsMsWindowGroups >>>" + sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/dia_frame_msms_window_groups.tsv DiaFrameMsMsWindowGroups" + sqlite3 ${tdf_name} "SELECT * FROM DiaFrameMsMsWindowGroups LIMIT 5" + echo "DiaFrameMsMsWindows >>>" + sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/dia_frame_msms_windows.tsv DiaFrameMsMsWindows" + sqlite3 ${tdf_name} "SELECT * FROM DiaFrameMsMsWindows LIMIT 5" + echo "DiaFrameMsMsInfo >>>" + sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/dia_frame_msms_info.tsv DiaFrameMsMsInfo" + sqlite3 ${tdf_name} "SELECT * FROM DiaFrameMsMsInfo LIMIT 5" + echo "Frames >>>" + sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/frames.tsv Frames" + sqlite3 ${tdf_name} "SELECT * FROM Frames LIMIT 5" + echo "Global Metadata >>>" + sqlite3 -cmd ".mode csv" -separator $'\t' ${tdf_name} ".import --skip 1 ${x}/global_metadata.tsv GlobalMetadata" + sqlite3 ${tdf_name} "SELECT * FROM GlobalMetadata LIMIT 5" + + echo "Creating tdf_bin" + touch ${tdf_bin_name} + # sqlite3 -separator ',' ${tdf_name} ".import ${x}/dia_frame_msms_window_groups.tsv DiaFrameMsMsWindowGroups" + +done diff --git a/tests/data/diapasef_tdf/README.md b/tests/data/diapasef_tdf/README.md new file mode 100644 index 0000000..a4822c3 --- /dev/null +++ b/tests/data/diapasef_tdf/README.md @@ -0,0 +1,5 @@ + +# diaPASEF test data + +This is a modified method that removes the intermediate window +from each window group (thus should not be used for acquisition of real data). diff --git a/tests/data/diapasef_tdf/dia_frame_msms_info.tsv b/tests/data/diapasef_tdf/dia_frame_msms_info.tsv new file mode 100644 index 0000000..7087b75 --- /dev/null +++ b/tests/data/diapasef_tdf/dia_frame_msms_info.tsv @@ -0,0 +1,27 @@ +Frame WindowGroup +2 1 +3 2 +4 3 +5 4 +6 5 +7 6 +8 7 +9 8 +11 1 +12 2 +13 3 +14 4 +15 5 +16 6 +17 7 +18 8 +20 1 +21 2 +22 3 +23 4 +24 5 +25 6 +26 7 +27 8 +29 1 +30 2 diff --git a/tests/data/diapasef_tdf/dia_frame_msms_window_groups.tsv b/tests/data/diapasef_tdf/dia_frame_msms_window_groups.tsv new file mode 100644 index 0000000..41a5d76 --- /dev/null +++ b/tests/data/diapasef_tdf/dia_frame_msms_window_groups.tsv @@ -0,0 +1,9 @@ +Id +1 +2 +3 +4 +5 +6 +7 +8 diff --git a/tests/data/diapasef_tdf/dia_frame_msms_windows.tsv b/tests/data/diapasef_tdf/dia_frame_msms_windows.tsv new file mode 100644 index 0000000..d83be41 --- /dev/null +++ b/tests/data/diapasef_tdf/dia_frame_msms_windows.tsv @@ -0,0 +1,17 @@ +WindowGroup ScanNumBegin ScanNumEnd IsolationMz IsolationWidth CollisionEnergy +1 100 397 812.5 25.0 42.9829890643986 +1 545 701 412.5 25.0 25.2126366950182 +2 100 372 837.5 25.0 43.5516403402187 +2 529 701 437.5 25.0 25.591737545565 +3 100 356 862.5 25.0 43.9307411907655 +3 512 701 462.5 25.0 26.0182260024301 +4 100 331 887.5 25.0 44.546780072904 +4 487 701 487.5 25.0 26.5868772782503 +5 100 315 912.5 25.0 44.9258809234508 +5 471 701 512.5 25.0 26.9659781287971 +6 100 298 937.5 25.0 45.3049817739976 +6 455 701 537.5 25.0 27.3450789793439 +7 100 273 962.5 25.0 45.9210206561361 +7 430 701 562.5 25.0 27.9611178614824 +8 100 257 987.5 25.0 46.3001215066829 +8 413 701 587.5 25.0 28.3402187120292 diff --git a/tests/data/diapasef_tdf/frames.tsv b/tests/data/diapasef_tdf/frames.tsv new file mode 100644 index 0000000..74ea921 --- /dev/null +++ b/tests/data/diapasef_tdf/frames.tsv @@ -0,0 +1,31 @@ +Id Time Polarity ScanMode MsMsType TimsId MaxIntensity SummedIntensities NumScans NumPeaks MzCalibration T1 T2 TimsCalibration PropertyGroup AccumulationTime RampTime Pressure Denoised +1 0.629856 + 9 0 64 5703 20584600 702 207045 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0 +2 0.711303 + 9 9 529662 353 765269 702 8629 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0 +3 0.793147 + 9 9 558593 333 810787 702 9216 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0 +4 0.874307 + 9 9 589670 352 806706 702 9166 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0 +5 0.955803 + 9 9 620635 361 870237 702 9841 1 25.6283947324288 25.2205013174081 1 1 75.007 75.007 2.38367083527434 0 +6 1.037344 + 9 9 653825 339 762751 702 8683 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +7 1.118971 + 9 9 683862 307 215401 702 2643 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +8 1.200507 + 9 9 694381 285 219393 702 2698 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +9 1.282113 + 9 9 705082 323 217661 702 2699 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +10 1.363679 + 9 0 715791 4700 14455769 702 137731 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +11 1.445201 + 9 9 1085008 335 182136 702 2171 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +12 1.526763 + 9 9 1093720 265 197002 702 2385 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +13 1.608378 + 9 9 1103213 335 194271 702 2390 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +14 1.689932 + 9 9 1112731 275 213692 702 2651 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +15 1.771684 + 9 9 1123167 291 194091 702 2387 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +16 1.853082 + 9 9 1132719 279 227903 702 2734 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +17 1.934854 + 9 9 1143499 339 218924 702 2678 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +18 2.01618 + 9 9 1154098 311 219963 702 2726 1 25.6284306232993 25.2215316591914 1 1 75.007 75.007 2.38367411214861 0 +19 2.098584 + 9 0 1164901 4334 14552572 702 138252 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +20 2.179965 + 9 9 1535400 313 180439 702 2208 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +21 2.261624 + 9 9 1544189 319 203342 702 2463 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +22 2.343267 + 9 9 1553941 281 192025 702 2387 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +23 2.424795 + 9 9 1563460 308 244388 702 2911 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +24 2.506324 + 9 9 1574834 260 211498 702 2571 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +25 2.587853 + 9 9 1585037 311 225892 702 2838 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +26 2.669553 + 9 9 1596209 273 202152 702 2507 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +27 2.751017 + 9 9 1606208 285 213091 702 2624 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +28 2.832589 + 9 0 1616628 4685 14300741 702 135361 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +29 2.914227 + 9 9 1979387 332 179155 702 2233 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 +30 2.995541 + 9 9 1988354 284 182142 702 2284 1 25.628466156872 25.2221137185281 1 1 75.007 75.007 2.38367735640121 0 diff --git a/tests/data/diapasef_tdf/global_metadata.tsv b/tests/data/diapasef_tdf/global_metadata.tsv new file mode 100644 index 0000000..9581c9b --- /dev/null +++ b/tests/data/diapasef_tdf/global_metadata.tsv @@ -0,0 +1,6 @@ +Key Value +OneOverK0AcqRangeLower 0.600000 +OneOverK0AcqRangeUpper 1.600000 +MzAcqRangeLower 50.000000 +MzAcqRangeUpper 1700.000000 +DigitizerNumSamples 434064 diff --git a/tests/data/synchropasef_tdf/README.md b/tests/data/synchropasef_tdf/README.md new file mode 100644 index 0000000..0afab32 --- /dev/null +++ b/tests/data/synchropasef_tdf/README.md @@ -0,0 +1,5 @@ + +# Synchro Pasef + +This is a section of the data from the syncho pasef manuscript. +SPECIFICALLY, I am keeping only scan numbers 1-200 (1-918 in the original data). diff --git a/tests/data/synchropasef_tdf/dia_frame_msms_info.tsv b/tests/data/synchropasef_tdf/dia_frame_msms_info.tsv new file mode 100644 index 0000000..0bed4c7 --- /dev/null +++ b/tests/data/synchropasef_tdf/dia_frame_msms_info.tsv @@ -0,0 +1,31 @@ +Frame WindowGroup +2 1 +3 2 +4 3 +5 4 +7 1 +8 2 +9 3 +10 4 +12 1 +13 2 +14 3 +15 4 +17 1 +18 2 +19 3 +20 4 +22 1 +23 2 +24 3 +25 4 +27 1 +28 2 +29 3 +30 4 +32 1 +33 2 +34 3 +35 4 +37 1 +38 2 diff --git a/tests/data/synchropasef_tdf/dia_frame_msms_window_groups.tsv b/tests/data/synchropasef_tdf/dia_frame_msms_window_groups.tsv new file mode 100644 index 0000000..2fa7349 --- /dev/null +++ b/tests/data/synchropasef_tdf/dia_frame_msms_window_groups.tsv @@ -0,0 +1,5 @@ +Id +1 +2 +3 +4 diff --git a/tests/data/synchropasef_tdf/dia_frame_msms_windows.tsv b/tests/data/synchropasef_tdf/dia_frame_msms_windows.tsv new file mode 100644 index 0000000..a6ca310 --- /dev/null +++ b/tests/data/synchropasef_tdf/dia_frame_msms_windows.tsv @@ -0,0 +1,804 @@ +WindowGroup ScanNumBegin ScanNumEnd IsolationMz IsolationWidth CollisionEnergy +1 0 1 1127.558 25.0 10.0 +1 1 2 1126.6501568627 25.0 10.0 +1 2 3 1125.7423137254 25.0 10.0 +1 3 4 1124.8344705881 25.0 10.0 +1 4 5 1123.9266274508 25.0 10.0 +1 5 6 1123.0187843135 25.0 10.0 +1 6 7 1122.1109411762 25.0 10.0 +1 7 8 1121.2030980389 25.0 10.0 +1 8 9 1120.2952549016 25.0 10.0 +1 9 10 1119.3874117643 25.0 10.0 +1 10 11 1118.479568627 25.0 10.0 +1 11 12 1117.5717254897 25.0 10.0 +1 12 13 1116.6638823524 25.0 10.0 +1 13 14 1115.7560392151 25.0 10.0 +1 14 15 1114.8481960778 25.0 10.0 +1 15 16 1113.9403529405 25.0 10.0 +1 16 17 1113.0325098032 25.0 10.0 +1 17 18 1112.1246666659 25.0 10.0 +1 18 19 1111.2168235286 25.0 10.0 +1 19 20 1110.3089803913 25.0 10.0 +1 20 21 1109.401137254 25.0 10.0 +1 21 22 1108.4932941167 25.0 10.0 +1 22 23 1107.5854509794 25.0 10.0 +1 23 24 1106.6776078421 25.0 10.0 +1 24 25 1105.7697647048 25.0 10.0 +1 25 26 1104.8619215675 25.0 10.0 +1 26 27 1103.9540784302 25.0 10.0 +1 27 28 1103.0462352929 25.0 10.0 +1 28 29 1102.1383921556 25.0 10.0 +1 29 30 1101.2305490183 25.0 10.0 +1 30 31 1100.322705881 25.0 10.0 +1 31 32 1099.4148627437 25.0 10.0 +1 32 33 1098.5070196064 25.0 10.0 +1 33 34 1097.5991764691 25.0 10.0 +1 34 35 1096.6913333318 25.0 10.0 +1 35 36 1095.7834901945 25.0 10.0 +1 36 37 1094.8756470572 25.0 10.0 +1 37 38 1093.9678039199 25.0 10.0 +1 38 39 1093.0599607826 25.0 10.0 +1 39 40 1092.1521176453 25.0 10.0 +1 40 41 1091.244274508 25.0 10.0 +1 41 42 1090.3364313707 25.0 10.0 +1 42 43 1089.4285882334 25.0 10.0 +1 43 44 1088.5207450961 25.0 10.0 +1 44 45 1087.6129019588 25.0 10.0 +1 45 46 1086.7050588215 25.0 10.0 +1 46 47 1085.7972156842 25.0 10.0 +1 47 48 1084.8893725469 25.0 10.0 +1 48 49 1083.9815294096 25.0 10.0 +1 49 50 1083.0736862723 25.0 10.0 +1 50 51 1082.165843135 25.0 10.0 +1 51 52 1081.2579999977 25.0 10.0 +1 52 53 1080.3501568604 25.0 10.0 +1 53 54 1079.4423137231 25.0 10.0 +1 54 55 1078.5344705858 25.0 10.0 +1 55 56 1077.6266274485 25.0 10.0 +1 56 57 1076.7187843112 25.0 10.0 +1 57 58 1075.8109411739 25.0 10.0 +1 58 59 1074.9030980366 25.0 10.0 +1 59 60 1073.9952548993 25.0 10.0 +1 60 61 1073.087411762 25.0 10.0 +1 61 62 1072.17956863 25.0 10.0 +1 62 63 1071.2717254927 25.0 10.0 +1 63 64 1070.3638823554 25.0 10.0 +1 64 65 1069.4560392181 25.0 10.0 +1 65 66 1068.5481960808 25.0 10.0 +1 66 67 1067.6403529435 25.0 10.0 +1 67 68 1066.7325098062 25.0 10.0 +1 68 69 1065.8246666689 25.0 10.0 +1 69 70 1064.9168235316 25.0 10.0 +1 70 71 1064.0089803943 25.0 10.0 +1 71 72 1063.101137257 25.0 10.0 +1 72 73 1062.1932941197 25.0 10.0 +1 73 74 1061.2854509824 25.0 10.0 +1 74 75 1060.3776078451 25.0 10.0 +1 75 76 1059.4697647078 25.0 10.0 +1 76 77 1058.5619215705 25.0 10.0 +1 77 78 1057.6540784332 25.0 10.0 +1 78 79 1056.7462352959 25.0 10.0 +1 79 80 1055.8383921586 25.0 10.0 +1 80 81 1054.9305490213 25.0 10.0 +1 81 82 1054.022705884 25.0 10.0 +1 82 83 1053.1148627467 25.0 10.0 +1 83 84 1052.2070196094 25.0 10.0 +1 84 85 1051.2991764721 25.0 10.0 +1 85 86 1050.3913333348 25.0 10.0 +1 86 87 1049.4834901975 25.0 10.0 +1 87 88 1048.5756470602 25.0 10.0 +1 88 89 1047.6678039229 25.0 10.0 +1 89 90 1046.7599607856 25.0 10.0 +1 90 91 1045.8521176483 25.0 10.0 +1 91 92 1044.944274511 25.0 10.0 +1 92 93 1044.0364313737 25.0 10.0 +1 93 94 1043.1285882364 25.0 10.0 +1 94 95 1042.2207450991 25.0 10.0 +1 95 96 1041.3129019618 25.0 10.0 +1 96 97 1040.4050588245 25.0 10.0 +1 97 98 1039.4972156872 25.0 10.0 +1 98 99 1038.5893725499 25.0 10.0 +1 99 100 1037.6815294126 25.0 10.0 +1 100 101 1036.7736862753 25.0 10.0 +1 101 102 1035.865843138 25.0 10.0 +1 102 103 1034.9580000007 25.0 10.0 +1 103 104 1034.0501568634 25.0 10.0 +1 104 105 1033.1423137261 25.0 10.0 +1 105 106 1032.2344705888 25.0 10.0 +1 106 107 1031.3266274515 25.0 10.0 +1 107 108 1030.4187843142 25.0 10.0 +1 108 109 1029.5109411769 25.0 10.0 +1 109 110 1028.6030980396 25.0 10.0 +1 110 111 1027.6952549023 25.0 10.0 +1 111 112 1026.787411765 25.0 10.0 +1 112 113 1025.8795686277 25.0 10.0 +1 113 114 1024.9717254904 25.0 10.0 +1 114 115 1024.0638823531 25.0 10.0 +1 115 116 1023.1560392158 25.0 10.0 +1 116 117 1022.2481960785 25.0 10.0 +1 117 118 1021.3403529412 25.0 10.0 +1 118 119 1020.4325098039 25.0 10.0 +1 119 120 1019.5246666666 25.0 10.0 +1 120 121 1018.6168235293 25.0 10.0 +1 121 122 1017.708980392 25.0 10.0 +1 122 123 1016.80113725 25.0 10.0 +1 123 124 1015.8932941127 25.0 10.0 +1 124 125 1014.9854509754 25.0 10.0 +1 125 126 1014.0776078381 25.0 10.0 +1 126 127 1013.1697647008 25.0 10.0 +1 127 128 1012.2619215635 25.0 10.0 +1 128 129 1011.3540784262 25.0 10.0 +1 129 130 1010.4462352889 25.0 10.0 +1 130 131 1009.5383921516 25.0 10.0 +1 131 132 1008.6305490143 25.0 10.0 +1 132 133 1007.722705877 25.0 10.0 +1 133 134 1006.8148627397 25.0 10.0 +1 134 135 1005.9070196024 25.0 10.0 +1 135 136 1004.9991764651 25.0 10.0 +1 136 137 1004.0913333278 25.0 10.0 +1 137 138 1003.1834901905 25.0 10.0 +1 138 139 1002.2756470532 25.0 10.0 +1 139 140 1001.3678039159 25.0 10.0 +1 140 141 1000.4599607786 25.0 10.0 +1 141 142 999.5521176413 25.0 10.0 +1 142 143 998.644274504 25.0 10.0 +1 143 144 997.7364313667 25.0 10.0 +1 144 145 996.8285882294 25.0 10.0 +1 145 146 995.9207450921 25.0 10.0 +1 146 147 995.0129019548 25.0 10.0 +1 147 148 994.1050588175 25.0 10.0 +1 148 149 993.1972156802 25.0 10.0 +1 149 150 992.2893725429 25.0 10.0 +1 150 151 991.3815294056 25.0 10.0 +1 151 152 990.4736862683 25.0 10.0 +1 152 153 989.565843131 25.0 10.0 +1 153 154 988.6579999937 25.0 10.0 +1 154 155 987.7501568564 25.0 10.0 +1 155 156 986.8423137191 25.0 10.0 +1 156 157 985.9344705818 25.0 10.0 +1 157 158 985.0266274445 25.0 10.0 +1 158 159 984.1187843072 25.0 10.0 +1 159 160 983.2109411699 25.0 10.0 +1 160 161 982.3030980326 25.0 10.0 +1 161 162 981.3952548953 25.0 10.0 +1 162 163 980.487411758 25.0 10.0 +1 163 164 979.5795686207 25.0 10.0 +1 164 165 978.6717254834 25.0 10.0 +1 165 166 977.7638823461 25.0 10.0 +1 166 167 976.8560392088 25.0 10.0 +1 167 168 975.9481960715 25.0 10.0 +1 168 169 975.0403529342 25.0 10.0 +1 169 170 974.1325097969 25.0 10.0 +1 170 171 973.2246666596 25.0 10.0 +1 171 172 972.3168235223 25.0 10.0 +1 172 173 971.408980385 25.0 10.0 +1 173 174 970.5011372477 25.0 10.0 +1 174 175 969.5932941104 25.0 10.0 +1 175 176 968.6854509731 25.0 10.0 +1 176 177 967.7776078358 25.0 10.0 +1 177 178 966.8697646985 25.0 10.0 +1 178 179 965.9619215612 25.0 10.0 +1 179 180 965.0540784239 25.0 10.0 +1 180 181 964.1462352866 25.0 10.0 +1 181 182 963.2383921493 25.0 10.0 +1 182 183 962.330549012 25.0 10.0 +1 183 184 961.42270588 25.0 10.0 +1 184 185 960.5148627427 25.0 10.0 +1 185 186 959.6070196054 25.0 10.0 +1 186 187 958.6991764681 25.0 10.0 +1 187 188 957.7913333308 25.0 10.0 +1 188 189 956.8834901935 25.0 10.0 +1 189 190 955.9756470562 25.0 10.0 +1 190 191 955.0678039189 25.0 10.0 +1 191 192 954.1599607816 25.0 10.0 +1 192 193 953.2521176443 25.0 10.0 +1 193 194 952.344274507 25.0 10.0 +1 194 195 951.4364313697 25.0 10.0 +1 195 196 950.5285882324 25.0 10.0 +1 196 197 949.6207450951 25.0 10.0 +1 197 198 948.7129019578 25.0 10.0 +1 198 199 947.8050588205 25.0 10.0 +1 199 200 946.8972156832 25.0 10.0 +1 200 201 945.9893725459 25.0 10.0 +2 0 1 1152.558 25.0 10.0 +2 1 2 1151.6501568627 25.0 10.0 +2 2 3 1150.7423137254 25.0 10.0 +2 3 4 1149.8344705881 25.0 10.0 +2 4 5 1148.9266274508 25.0 10.0 +2 5 6 1148.0187843135 25.0 10.0 +2 6 7 1147.1109411762 25.0 10.0 +2 7 8 1146.2030980389 25.0 10.0 +2 8 9 1145.2952549016 25.0 10.0 +2 9 10 1144.3874117643 25.0 10.0 +2 10 11 1143.479568627 25.0 10.0 +2 11 12 1142.5717254897 25.0 10.0 +2 12 13 1141.6638823524 25.0 10.0 +2 13 14 1140.7560392151 25.0 10.0 +2 14 15 1139.8481960778 25.0 10.0 +2 15 16 1138.9403529405 25.0 10.0 +2 16 17 1138.0325098032 25.0 10.0 +2 17 18 1137.1246666659 25.0 10.0 +2 18 19 1136.2168235286 25.0 10.0 +2 19 20 1135.3089803913 25.0 10.0 +2 20 21 1134.401137254 25.0 10.0 +2 21 22 1133.4932941167 25.0 10.0 +2 22 23 1132.5854509794 25.0 10.0 +2 23 24 1131.6776078421 25.0 10.0 +2 24 25 1130.7697647048 25.0 10.0 +2 25 26 1129.8619215675 25.0 10.0 +2 26 27 1128.9540784302 25.0 10.0 +2 27 28 1128.0462352929 25.0 10.0 +2 28 29 1127.1383921556 25.0 10.0 +2 29 30 1126.2305490183 25.0 10.0 +2 30 31 1125.322705881 25.0 10.0 +2 31 32 1124.4148627437 25.0 10.0 +2 32 33 1123.5070196064 25.0 10.0 +2 33 34 1122.5991764691 25.0 10.0 +2 34 35 1121.6913333318 25.0 10.0 +2 35 36 1120.7834901945 25.0 10.0 +2 36 37 1119.8756470572 25.0 10.0 +2 37 38 1118.9678039199 25.0 10.0 +2 38 39 1118.0599607826 25.0 10.0 +2 39 40 1117.1521176453 25.0 10.0 +2 40 41 1116.244274508 25.0 10.0 +2 41 42 1115.3364313707 25.0 10.0 +2 42 43 1114.4285882334 25.0 10.0 +2 43 44 1113.5207450961 25.0 10.0 +2 44 45 1112.6129019588 25.0 10.0 +2 45 46 1111.7050588215 25.0 10.0 +2 46 47 1110.7972156842 25.0 10.0 +2 47 48 1109.8893725469 25.0 10.0 +2 48 49 1108.9815294096 25.0 10.0 +2 49 50 1108.0736862723 25.0 10.0 +2 50 51 1107.165843135 25.0 10.0 +2 51 52 1106.2579999977 25.0 10.0 +2 52 53 1105.3501568604 25.0 10.0 +2 53 54 1104.4423137231 25.0 10.0 +2 54 55 1103.5344705858 25.0 10.0 +2 55 56 1102.6266274485 25.0 10.0 +2 56 57 1101.7187843112 25.0 10.0 +2 57 58 1100.8109411739 25.0 10.0 +2 58 59 1099.9030980366 25.0 10.0 +2 59 60 1098.9952548993 25.0 10.0 +2 60 61 1098.087411762 25.0 10.0 +2 61 62 1097.17956863 25.0 10.0 +2 62 63 1096.2717254927 25.0 10.0 +2 63 64 1095.3638823554 25.0 10.0 +2 64 65 1094.4560392181 25.0 10.0 +2 65 66 1093.5481960808 25.0 10.0 +2 66 67 1092.6403529435 25.0 10.0 +2 67 68 1091.7325098062 25.0 10.0 +2 68 69 1090.8246666689 25.0 10.0 +2 69 70 1089.9168235316 25.0 10.0 +2 70 71 1089.0089803943 25.0 10.0 +2 71 72 1088.101137257 25.0 10.0 +2 72 73 1087.1932941197 25.0 10.0 +2 73 74 1086.2854509824 25.0 10.0 +2 74 75 1085.3776078451 25.0 10.0 +2 75 76 1084.4697647078 25.0 10.0 +2 76 77 1083.5619215705 25.0 10.0 +2 77 78 1082.6540784332 25.0 10.0 +2 78 79 1081.7462352959 25.0 10.0 +2 79 80 1080.8383921586 25.0 10.0 +2 80 81 1079.9305490213 25.0 10.0 +2 81 82 1079.022705884 25.0 10.0 +2 82 83 1078.1148627467 25.0 10.0 +2 83 84 1077.2070196094 25.0 10.0 +2 84 85 1076.2991764721 25.0 10.0 +2 85 86 1075.3913333348 25.0 10.0 +2 86 87 1074.4834901975 25.0 10.0 +2 87 88 1073.5756470602 25.0 10.0 +2 88 89 1072.6678039229 25.0 10.0 +2 89 90 1071.7599607856 25.0 10.0 +2 90 91 1070.8521176483 25.0 10.0 +2 91 92 1069.944274511 25.0 10.0 +2 92 93 1069.0364313737 25.0 10.0 +2 93 94 1068.1285882364 25.0 10.0 +2 94 95 1067.2207450991 25.0 10.0 +2 95 96 1066.3129019618 25.0 10.0 +2 96 97 1065.4050588245 25.0 10.0 +2 97 98 1064.4972156872 25.0 10.0 +2 98 99 1063.5893725499 25.0 10.0 +2 99 100 1062.6815294126 25.0 10.0 +2 100 101 1061.7736862753 25.0 10.0 +2 101 102 1060.865843138 25.0 10.0 +2 102 103 1059.9580000007 25.0 10.0 +2 103 104 1059.0501568634 25.0 10.0 +2 104 105 1058.1423137261 25.0 10.0 +2 105 106 1057.2344705888 25.0 10.0 +2 106 107 1056.3266274515 25.0 10.0 +2 107 108 1055.4187843142 25.0 10.0 +2 108 109 1054.5109411769 25.0 10.0 +2 109 110 1053.6030980396 25.0 10.0 +2 110 111 1052.6952549023 25.0 10.0 +2 111 112 1051.787411765 25.0 10.0 +2 112 113 1050.8795686277 25.0 10.0 +2 113 114 1049.9717254904 25.0 10.0 +2 114 115 1049.0638823531 25.0 10.0 +2 115 116 1048.1560392158 25.0 10.0 +2 116 117 1047.2481960785 25.0 10.0 +2 117 118 1046.3403529412 25.0 10.0 +2 118 119 1045.4325098039 25.0 10.0 +2 119 120 1044.5246666666 25.0 10.0 +2 120 121 1043.6168235293 25.0 10.0 +2 121 122 1042.708980392 25.0 10.0 +2 122 123 1041.80113725 25.0 10.0 +2 123 124 1040.8932941127 25.0 10.0 +2 124 125 1039.9854509754 25.0 10.0 +2 125 126 1039.0776078381 25.0 10.0 +2 126 127 1038.1697647008 25.0 10.0 +2 127 128 1037.2619215635 25.0 10.0 +2 128 129 1036.3540784262 25.0 10.0 +2 129 130 1035.4462352889 25.0 10.0 +2 130 131 1034.5383921516 25.0 10.0 +2 131 132 1033.6305490143 25.0 10.0 +2 132 133 1032.722705877 25.0 10.0 +2 133 134 1031.8148627397 25.0 10.0 +2 134 135 1030.9070196024 25.0 10.0 +2 135 136 1029.9991764651 25.0 10.0 +2 136 137 1029.0913333278 25.0 10.0 +2 137 138 1028.1834901905 25.0 10.0 +2 138 139 1027.2756470532 25.0 10.0 +2 139 140 1026.3678039159 25.0 10.0 +2 140 141 1025.4599607786 25.0 10.0 +2 141 142 1024.5521176413 25.0 10.0 +2 142 143 1023.644274504 25.0 10.0 +2 143 144 1022.7364313667 25.0 10.0 +2 144 145 1021.8285882294 25.0 10.0 +2 145 146 1020.9207450921 25.0 10.0 +2 146 147 1020.0129019548 25.0 10.0 +2 147 148 1019.1050588175 25.0 10.0 +2 148 149 1018.1972156802 25.0 10.0 +2 149 150 1017.2893725429 25.0 10.0 +2 150 151 1016.3815294056 25.0 10.0 +2 151 152 1015.4736862683 25.0 10.0 +2 152 153 1014.565843131 25.0 10.0 +2 153 154 1013.6579999937 25.0 10.0 +2 154 155 1012.7501568564 25.0 10.0 +2 155 156 1011.8423137191 25.0 10.0 +2 156 157 1010.9344705818 25.0 10.0 +2 157 158 1010.0266274445 25.0 10.0 +2 158 159 1009.1187843072 25.0 10.0 +2 159 160 1008.2109411699 25.0 10.0 +2 160 161 1007.3030980326 25.0 10.0 +2 161 162 1006.3952548953 25.0 10.0 +2 162 163 1005.487411758 25.0 10.0 +2 163 164 1004.5795686207 25.0 10.0 +2 164 165 1003.6717254834 25.0 10.0 +2 165 166 1002.7638823461 25.0 10.0 +2 166 167 1001.8560392088 25.0 10.0 +2 167 168 1000.9481960715 25.0 10.0 +2 168 169 1000.0403529342 25.0 10.0 +2 169 170 999.1325097969 25.0 10.0 +2 170 171 998.2246666596 25.0 10.0 +2 171 172 997.3168235223 25.0 10.0 +2 172 173 996.408980385 25.0 10.0 +2 173 174 995.5011372477 25.0 10.0 +2 174 175 994.5932941104 25.0 10.0 +2 175 176 993.6854509731 25.0 10.0 +2 176 177 992.7776078358 25.0 10.0 +2 177 178 991.8697646985 25.0 10.0 +2 178 179 990.9619215612 25.0 10.0 +2 179 180 990.0540784239 25.0 10.0 +2 180 181 989.1462352866 25.0 10.0 +2 181 182 988.2383921493 25.0 10.0 +2 182 183 987.330549012 25.0 10.0 +2 183 184 986.42270588 25.0 10.0 +2 184 185 985.5148627427 25.0 10.0 +2 185 186 984.6070196054 25.0 10.0 +2 186 187 983.6991764681 25.0 10.0 +2 187 188 982.7913333308 25.0 10.0 +2 188 189 981.8834901935 25.0 10.0 +2 189 190 980.9756470562 25.0 10.0 +2 190 191 980.0678039189 25.0 10.0 +2 191 192 979.1599607816 25.0 10.0 +2 192 193 978.2521176443 25.0 10.0 +2 193 194 977.344274507 25.0 10.0 +2 194 195 976.4364313697 25.0 10.0 +2 195 196 975.5285882324 25.0 10.0 +2 196 197 974.6207450951 25.0 10.0 +2 197 198 973.7129019578 25.0 10.0 +2 198 199 972.8050588205 25.0 10.0 +2 199 200 971.8972156832 25.0 10.0 +3 0 1 1177.558 25.0 10.0 +3 1 2 1176.6501568627 25.0 10.0 +3 2 3 1175.7423137254 25.0 10.0 +3 3 4 1174.8344705881 25.0 10.0 +3 4 5 1173.9266274508 25.0 10.0 +3 5 6 1173.0187843135 25.0 10.0 +3 6 7 1172.1109411762 25.0 10.0 +3 7 8 1171.2030980389 25.0 10.0 +3 8 9 1170.2952549016 25.0 10.0 +3 9 10 1169.3874117643 25.0 10.0 +3 10 11 1168.479568627 25.0 10.0 +3 11 12 1167.5717254897 25.0 10.0 +3 12 13 1166.6638823524 25.0 10.0 +3 13 14 1165.7560392151 25.0 10.0 +3 14 15 1164.8481960778 25.0 10.0 +3 15 16 1163.9403529405 25.0 10.0 +3 16 17 1163.0325098032 25.0 10.0 +3 17 18 1162.1246666659 25.0 10.0 +3 18 19 1161.2168235286 25.0 10.0 +3 19 20 1160.3089803913 25.0 10.0 +3 20 21 1159.401137254 25.0 10.0 +3 21 22 1158.4932941167 25.0 10.0 +3 22 23 1157.5854509794 25.0 10.0 +3 23 24 1156.6776078421 25.0 10.0 +3 24 25 1155.7697647048 25.0 10.0 +3 25 26 1154.8619215675 25.0 10.0 +3 26 27 1153.9540784302 25.0 10.0 +3 27 28 1153.0462352929 25.0 10.0 +3 28 29 1152.1383921556 25.0 10.0 +3 29 30 1151.2305490183 25.0 10.0 +3 30 31 1150.322705881 25.0 10.0 +3 31 32 1149.4148627437 25.0 10.0 +3 32 33 1148.5070196064 25.0 10.0 +3 33 34 1147.5991764691 25.0 10.0 +3 34 35 1146.6913333318 25.0 10.0 +3 35 36 1145.7834901945 25.0 10.0 +3 36 37 1144.8756470572 25.0 10.0 +3 37 38 1143.9678039199 25.0 10.0 +3 38 39 1143.0599607826 25.0 10.0 +3 39 40 1142.1521176453 25.0 10.0 +3 40 41 1141.244274508 25.0 10.0 +3 41 42 1140.3364313707 25.0 10.0 +3 42 43 1139.4285882334 25.0 10.0 +3 43 44 1138.5207450961 25.0 10.0 +3 44 45 1137.6129019588 25.0 10.0 +3 45 46 1136.7050588215 25.0 10.0 +3 46 47 1135.7972156842 25.0 10.0 +3 47 48 1134.8893725469 25.0 10.0 +3 48 49 1133.9815294096 25.0 10.0 +3 49 50 1133.0736862723 25.0 10.0 +3 50 51 1132.165843135 25.0 10.0 +3 51 52 1131.2579999977 25.0 10.0 +3 52 53 1130.3501568604 25.0 10.0 +3 53 54 1129.4423137231 25.0 10.0 +3 54 55 1128.5344705858 25.0 10.0 +3 55 56 1127.6266274485 25.0 10.0 +3 56 57 1126.7187843112 25.0 10.0 +3 57 58 1125.8109411739 25.0 10.0 +3 58 59 1124.9030980366 25.0 10.0 +3 59 60 1123.9952548993 25.0 10.0 +3 60 61 1123.087411762 25.0 10.0 +3 61 62 1122.17956863 25.0 10.0 +3 62 63 1121.2717254927 25.0 10.0 +3 63 64 1120.3638823554 25.0 10.0 +3 64 65 1119.4560392181 25.0 10.0 +3 65 66 1118.5481960808 25.0 10.0 +3 66 67 1117.6403529435 25.0 10.0 +3 67 68 1116.7325098062 25.0 10.0 +3 68 69 1115.8246666689 25.0 10.0 +3 69 70 1114.9168235316 25.0 10.0 +3 70 71 1114.0089803943 25.0 10.0 +3 71 72 1113.101137257 25.0 10.0 +3 72 73 1112.1932941197 25.0 10.0 +3 73 74 1111.2854509824 25.0 10.0 +3 74 75 1110.3776078451 25.0 10.0 +3 75 76 1109.4697647078 25.0 10.0 +3 76 77 1108.5619215705 25.0 10.0 +3 77 78 1107.6540784332 25.0 10.0 +3 78 79 1106.7462352959 25.0 10.0 +3 79 80 1105.8383921586 25.0 10.0 +3 80 81 1104.9305490213 25.0 10.0 +3 81 82 1104.022705884 25.0 10.0 +3 82 83 1103.1148627467 25.0 10.0 +3 83 84 1102.2070196094 25.0 10.0 +3 84 85 1101.2991764721 25.0 10.0 +3 85 86 1100.3913333348 25.0 10.0 +3 86 87 1099.4834901975 25.0 10.0 +3 87 88 1098.5756470602 25.0 10.0 +3 88 89 1097.6678039229 25.0 10.0 +3 89 90 1096.7599607856 25.0 10.0 +3 90 91 1095.8521176483 25.0 10.0 +3 91 92 1094.944274511 25.0 10.0 +3 92 93 1094.0364313737 25.0 10.0 +3 93 94 1093.1285882364 25.0 10.0 +3 94 95 1092.2207450991 25.0 10.0 +3 95 96 1091.3129019618 25.0 10.0 +3 96 97 1090.4050588245 25.0 10.0 +3 97 98 1089.4972156872 25.0 10.0 +3 98 99 1088.5893725499 25.0 10.0 +3 99 100 1087.6815294126 25.0 10.0 +3 100 101 1086.7736862753 25.0 10.0 +3 101 102 1085.865843138 25.0 10.0 +3 102 103 1084.9580000007 25.0 10.0 +3 103 104 1084.0501568634 25.0 10.0 +3 104 105 1083.1423137261 25.0 10.0 +3 105 106 1082.2344705888 25.0 10.0 +3 106 107 1081.3266274515 25.0 10.0 +3 107 108 1080.4187843142 25.0 10.0 +3 108 109 1079.5109411769 25.0 10.0 +3 109 110 1078.6030980396 25.0 10.0 +3 110 111 1077.6952549023 25.0 10.0 +3 111 112 1076.787411765 25.0 10.0 +3 112 113 1075.8795686277 25.0 10.0 +3 113 114 1074.9717254904 25.0 10.0 +3 114 115 1074.0638823531 25.0 10.0 +3 115 116 1073.1560392158 25.0 10.0 +3 116 117 1072.2481960785 25.0 10.0 +3 117 118 1071.3403529412 25.0 10.0 +3 118 119 1070.4325098039 25.0 10.0 +3 119 120 1069.5246666666 25.0 10.0 +3 120 121 1068.6168235293 25.0 10.0 +3 121 122 1067.708980392 25.0 10.0 +3 122 123 1066.80113725 25.0 10.0 +3 123 124 1065.8932941127 25.0 10.0 +3 124 125 1064.9854509754 25.0 10.0 +3 125 126 1064.0776078381 25.0 10.0 +3 126 127 1063.1697647008 25.0 10.0 +3 127 128 1062.2619215635 25.0 10.0 +3 128 129 1061.3540784262 25.0 10.0 +3 129 130 1060.4462352889 25.0 10.0 +3 130 131 1059.5383921516 25.0 10.0 +3 131 132 1058.6305490143 25.0 10.0 +3 132 133 1057.722705877 25.0 10.0 +3 133 134 1056.8148627397 25.0 10.0 +3 134 135 1055.9070196024 25.0 10.0 +3 135 136 1054.9991764651 25.0 10.0 +3 136 137 1054.0913333278 25.0 10.0 +3 137 138 1053.1834901905 25.0 10.0 +3 138 139 1052.2756470532 25.0 10.0 +3 139 140 1051.3678039159 25.0 10.0 +3 140 141 1050.4599607786 25.0 10.0 +3 141 142 1049.5521176413 25.0 10.0 +3 142 143 1048.644274504 25.0 10.0 +3 143 144 1047.7364313667 25.0 10.0 +3 144 145 1046.8285882294 25.0 10.0 +3 145 146 1045.9207450921 25.0 10.0 +3 146 147 1045.0129019548 25.0 10.0 +3 147 148 1044.1050588175 25.0 10.0 +3 148 149 1043.1972156802 25.0 10.0 +3 149 150 1042.2893725429 25.0 10.0 +3 150 151 1041.3815294056 25.0 10.0 +3 151 152 1040.4736862683 25.0 10.0 +3 152 153 1039.565843131 25.0 10.0 +3 153 154 1038.6579999937 25.0 10.0 +3 154 155 1037.7501568564 25.0 10.0 +3 155 156 1036.8423137191 25.0 10.0 +3 156 157 1035.9344705818 25.0 10.0 +3 157 158 1035.0266274445 25.0 10.0 +3 158 159 1034.1187843072 25.0 10.0 +3 159 160 1033.2109411699 25.0 10.0 +3 160 161 1032.3030980326 25.0 10.0 +3 161 162 1031.3952548953 25.0 10.0 +3 162 163 1030.487411758 25.0 10.0 +3 163 164 1029.5795686207 25.0 10.0 +3 164 165 1028.6717254834 25.0 10.0 +3 165 166 1027.7638823461 25.0 10.0 +3 166 167 1026.8560392088 25.0 10.0 +3 167 168 1025.9481960715 25.0 10.0 +3 168 169 1025.0403529342 25.0 10.0 +3 169 170 1024.1325097969 25.0 10.0 +3 170 171 1023.2246666596 25.0 10.0 +3 171 172 1022.3168235223 25.0 10.0 +3 172 173 1021.408980385 25.0 10.0 +3 173 174 1020.5011372477 25.0 10.0 +3 174 175 1019.5932941104 25.0 10.0 +3 175 176 1018.6854509731 25.0 10.0 +3 176 177 1017.7776078358 25.0 10.0 +3 177 178 1016.8697646985 25.0 10.0 +3 178 179 1015.9619215612 25.0 10.0 +3 179 180 1015.0540784239 25.0 10.0 +3 180 181 1014.1462352866 25.0 10.0 +3 181 182 1013.2383921493 25.0 10.0 +3 182 183 1012.330549012 25.0 10.0 +3 183 184 1011.42270588 25.0 10.0 +3 184 185 1010.5148627427 25.0 10.0 +3 185 186 1009.6070196054 25.0 10.0 +3 186 187 1008.6991764681 25.0 10.0 +3 187 188 1007.7913333308 25.0 10.0 +3 188 189 1006.8834901935 25.0 10.0 +3 189 190 1005.9756470562 25.0 10.0 +3 190 191 1005.0678039189 25.0 10.0 +3 191 192 1004.1599607816 25.0 10.0 +3 192 193 1003.2521176443 25.0 10.0 +3 193 194 1002.344274507 25.0 10.0 +3 194 195 1001.4364313697 25.0 10.0 +3 195 196 1000.5285882324 25.0 10.0 +3 196 197 999.6207450951 25.0 10.0 +3 197 198 998.7129019578 25.0 10.0 +3 198 199 997.8050588205 25.0 10.0 +3 199 200 996.8972156832 25.0 10.0 +3 200 201 995.9893725459 25.0 10.0 +4 0 1 1202.558 25.0 10.0 +4 1 2 1201.6501568627 25.0 10.0 +4 2 3 1200.7423137254 25.0 10.0 +4 3 4 1199.8344705881 25.0 10.0 +4 4 5 1198.9266274508 25.0 10.0 +4 5 6 1198.0187843135 25.0 10.0 +4 6 7 1197.1109411762 25.0 10.0 +4 7 8 1196.2030980389 25.0 10.0 +4 8 9 1195.2952549016 25.0 10.0 +4 9 10 1194.3874117643 25.0 10.0 +4 10 11 1193.479568627 25.0 10.0 +4 11 12 1192.5717254897 25.0 10.0 +4 12 13 1191.6638823524 25.0 10.0 +4 13 14 1190.7560392151 25.0 10.0 +4 14 15 1189.8481960778 25.0 10.0 +4 15 16 1188.9403529405 25.0 10.0 +4 16 17 1188.0325098032 25.0 10.0 +4 17 18 1187.1246666659 25.0 10.0 +4 18 19 1186.2168235286 25.0 10.0 +4 19 20 1185.3089803913 25.0 10.0 +4 20 21 1184.401137254 25.0 10.0 +4 21 22 1183.4932941167 25.0 10.0 +4 22 23 1182.5854509794 25.0 10.0 +4 23 24 1181.6776078421 25.0 10.0 +4 24 25 1180.7697647048 25.0 10.0 +4 25 26 1179.8619215675 25.0 10.0 +4 26 27 1178.9540784302 25.0 10.0 +4 27 28 1178.0462352929 25.0 10.0 +4 28 29 1177.1383921556 25.0 10.0 +4 29 30 1176.2305490183 25.0 10.0 +4 30 31 1175.322705881 25.0 10.0 +4 31 32 1174.4148627437 25.0 10.0 +4 32 33 1173.5070196064 25.0 10.0 +4 33 34 1172.5991764691 25.0 10.0 +4 34 35 1171.6913333318 25.0 10.0 +4 35 36 1170.7834901945 25.0 10.0 +4 36 37 1169.8756470572 25.0 10.0 +4 37 38 1168.9678039199 25.0 10.0 +4 38 39 1168.0599607826 25.0 10.0 +4 39 40 1167.1521176453 25.0 10.0 +4 40 41 1166.244274508 25.0 10.0 +4 41 42 1165.3364313707 25.0 10.0 +4 42 43 1164.4285882334 25.0 10.0 +4 43 44 1163.5207450961 25.0 10.0 +4 44 45 1162.6129019588 25.0 10.0 +4 45 46 1161.7050588215 25.0 10.0 +4 46 47 1160.7972156842 25.0 10.0 +4 47 48 1159.8893725469 25.0 10.0 +4 48 49 1158.9815294096 25.0 10.0 +4 49 50 1158.0736862723 25.0 10.0 +4 50 51 1157.165843135 25.0 10.0 +4 51 52 1156.2579999977 25.0 10.0 +4 52 53 1155.3501568604 25.0 10.0 +4 53 54 1154.4423137231 25.0 10.0 +4 54 55 1153.5344705858 25.0 10.0 +4 55 56 1152.6266274485 25.0 10.0 +4 56 57 1151.7187843112 25.0 10.0 +4 57 58 1150.8109411739 25.0 10.0 +4 58 59 1149.9030980366 25.0 10.0 +4 59 60 1148.9952548993 25.0 10.0 +4 60 61 1148.087411762 25.0 10.0 +4 61 62 1147.17956863 25.0 10.0 +4 62 63 1146.2717254927 25.0 10.0 +4 63 64 1145.3638823554 25.0 10.0 +4 64 65 1144.4560392181 25.0 10.0 +4 65 66 1143.5481960808 25.0 10.0 +4 66 67 1142.6403529435 25.0 10.0 +4 67 68 1141.7325098062 25.0 10.0 +4 68 69 1140.8246666689 25.0 10.0 +4 69 70 1139.9168235316 25.0 10.0 +4 70 71 1139.0089803943 25.0 10.0 +4 71 72 1138.101137257 25.0 10.0 +4 72 73 1137.1932941197 25.0 10.0 +4 73 74 1136.2854509824 25.0 10.0 +4 74 75 1135.3776078451 25.0 10.0 +4 75 76 1134.4697647078 25.0 10.0 +4 76 77 1133.5619215705 25.0 10.0 +4 77 78 1132.6540784332 25.0 10.0 +4 78 79 1131.7462352959 25.0 10.0 +4 79 80 1130.8383921586 25.0 10.0 +4 80 81 1129.9305490213 25.0 10.0 +4 81 82 1129.022705884 25.0 10.0 +4 82 83 1128.1148627467 25.0 10.0 +4 83 84 1127.2070196094 25.0 10.0 +4 84 85 1126.2991764721 25.0 10.0 +4 85 86 1125.3913333348 25.0 10.0 +4 86 87 1124.4834901975 25.0 10.0 +4 87 88 1123.5756470602 25.0 10.0 +4 88 89 1122.6678039229 25.0 10.0 +4 89 90 1121.7599607856 25.0 10.0 +4 90 91 1120.8521176483 25.0 10.0 +4 91 92 1119.944274511 25.0 10.0 +4 92 93 1119.0364313737 25.0 10.0 +4 93 94 1118.1285882364 25.0 10.0 +4 94 95 1117.2207450991 25.0 10.0 +4 95 96 1116.3129019618 25.0 10.0 +4 96 97 1115.4050588245 25.0 10.0 +4 97 98 1114.4972156872 25.0 10.0 +4 98 99 1113.5893725499 25.0 10.0 +4 99 100 1112.6815294126 25.0 10.0 +4 100 101 1111.7736862753 25.0 10.0 +4 101 102 1110.865843138 25.0 10.0 +4 102 103 1109.9580000007 25.0 10.0 +4 103 104 1109.0501568634 25.0 10.0 +4 104 105 1108.1423137261 25.0 10.0 +4 105 106 1107.2344705888 25.0 10.0 +4 106 107 1106.3266274515 25.0 10.0 +4 107 108 1105.4187843142 25.0 10.0 +4 108 109 1104.5109411769 25.0 10.0 +4 109 110 1103.6030980396 25.0 10.0 +4 110 111 1102.6952549023 25.0 10.0 +4 111 112 1101.787411765 25.0 10.0 +4 112 113 1100.8795686277 25.0 10.0 +4 113 114 1099.9717254904 25.0 10.0 +4 114 115 1099.0638823531 25.0 10.0 +4 115 116 1098.1560392158 25.0 10.0 +4 116 117 1097.2481960785 25.0 10.0 +4 117 118 1096.3403529412 25.0 10.0 +4 118 119 1095.4325098039 25.0 10.0 +4 119 120 1094.5246666666 25.0 10.0 +4 120 121 1093.6168235293 25.0 10.0 +4 121 122 1092.708980392 25.0 10.0 +4 122 123 1091.80113725 25.0 10.0 +4 123 124 1090.8932941127 25.0 10.0 +4 124 125 1089.9854509754 25.0 10.0 +4 125 126 1089.0776078381 25.0 10.0 +4 126 127 1088.1697647008 25.0 10.0 +4 127 128 1087.2619215635 25.0 10.0 +4 128 129 1086.3540784262 25.0 10.0 +4 129 130 1085.4462352889 25.0 10.0 +4 130 131 1084.5383921516 25.0 10.0 +4 131 132 1083.6305490143 25.0 10.0 +4 132 133 1082.722705877 25.0 10.0 +4 133 134 1081.8148627397 25.0 10.0 +4 134 135 1080.9070196024 25.0 10.0 +4 135 136 1079.9991764651 25.0 10.0 +4 136 137 1079.0913333278 25.0 10.0 +4 137 138 1078.1834901905 25.0 10.0 +4 138 139 1077.2756470532 25.0 10.0 +4 139 140 1076.3678039159 25.0 10.0 +4 140 141 1075.4599607786 25.0 10.0 +4 141 142 1074.5521176413 25.0 10.0 +4 142 143 1073.644274504 25.0 10.0 +4 143 144 1072.7364313667 25.0 10.0 +4 144 145 1071.8285882294 25.0 10.0 +4 145 146 1070.9207450921 25.0 10.0 +4 146 147 1070.0129019548 25.0 10.0 +4 147 148 1069.1050588175 25.0 10.0 +4 148 149 1068.1972156802 25.0 10.0 +4 149 150 1067.2893725429 25.0 10.0 +4 150 151 1066.3815294056 25.0 10.0 +4 151 152 1065.4736862683 25.0 10.0 +4 152 153 1064.565843131 25.0 10.0 +4 153 154 1063.6579999937 25.0 10.0 +4 154 155 1062.7501568564 25.0 10.0 +4 155 156 1061.8423137191 25.0 10.0 +4 156 157 1060.9344705818 25.0 10.0 +4 157 158 1060.0266274445 25.0 10.0 +4 158 159 1059.1187843072 25.0 10.0 +4 159 160 1058.2109411699 25.0 10.0 +4 160 161 1057.3030980326 25.0 10.0 +4 161 162 1056.3952548953 25.0 10.0 +4 162 163 1055.487411758 25.0 10.0 +4 163 164 1054.5795686207 25.0 10.0 +4 164 165 1053.6717254834 25.0 10.0 +4 165 166 1052.7638823461 25.0 10.0 +4 166 167 1051.8560392088 25.0 10.0 +4 167 168 1050.9481960715 25.0 10.0 +4 168 169 1050.0403529342 25.0 10.0 +4 169 170 1049.1325097969 25.0 10.0 +4 170 171 1048.2246666596 25.0 10.0 +4 171 172 1047.3168235223 25.0 10.0 +4 172 173 1046.408980385 25.0 10.0 +4 173 174 1045.5011372477 25.0 10.0 +4 174 175 1044.5932941104 25.0 10.0 +4 175 176 1043.6854509731 25.0 10.0 +4 176 177 1042.7776078358 25.0 10.0 +4 177 178 1041.8697646985 25.0 10.0 +4 178 179 1040.9619215612 25.0 10.0 +4 179 180 1040.0540784239 25.0 10.0 +4 180 181 1039.1462352866 25.0 10.0 +4 181 182 1038.2383921493 25.0 10.0 +4 182 183 1037.330549012 25.0 10.0 +4 183 184 1036.42270588 25.0 10.0 +4 184 185 1035.5148627427 25.0 10.0 +4 185 186 1034.6070196054 25.0 10.0 +4 186 187 1033.6991764681 25.0 10.0 +4 187 188 1032.7913333308 25.0 10.0 +4 188 189 1031.8834901935 25.0 10.0 +4 189 190 1030.9756470562 25.0 10.0 +4 190 191 1030.0678039189 25.0 10.0 +4 191 192 1029.1599607816 25.0 10.0 +4 192 193 1028.2521176443 25.0 10.0 +4 193 194 1027.344274507 25.0 10.0 +4 194 195 1026.4364313697 25.0 10.0 +4 195 196 1025.5285882324 25.0 10.0 +4 196 197 1024.6207450951 25.0 10.0 +4 197 198 1023.7129019578 25.0 10.0 +4 198 199 1022.8050588205 25.0 10.0 +4 199 200 1021.8972156832 25.0 10.0 +4 200 201 1020.9893725459 25.0 10.0 diff --git a/tests/data/synchropasef_tdf/frames.tsv b/tests/data/synchropasef_tdf/frames.tsv new file mode 100644 index 0000000..792ab1d --- /dev/null +++ b/tests/data/synchropasef_tdf/frames.tsv @@ -0,0 +1,31 @@ +Id Time Polarity ScanMode MsMsType TimsId MaxIntensity SummedIntensities NumScans NumPeaks MzCalibration T1 T2 TimsCalibration PropertyGroup AccumulationTime RampTime Pressure +1 0.649239 + 9 0 64 9608 22380878 918 339849 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +2 0.754762 + 9 9 807381 796 565177 918 6680 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +3 0.87014 + 9 9 828914 1014 595464 918 7300 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +4 0.985113 + 9 9 852190 902 615081 918 7645 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +5 1.099038 + 9 9 876521 754 707930 918 8723 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +6 1.206625 + 9 0 903694 11802 22093749 918 333488 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +7 1.321188 + 9 9 1696979 868 560818 918 6566 1 25.3746447717924 25.85769581691 1 1 99.953 99.953 2.69219928765415 +8 1.435212 + 9 9 1718265 1023 624752 918 7520 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +9 1.553013 + 9 9 1742123 893 644103 918 7896 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +10 1.668145 + 9 9 1767035 604 734712 918 9079 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +11 1.773772 + 9 0 1795220 11802 23231987 918 353225 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +12 1.89001 + 9 9 2630589 584 580882 918 6807 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +13 2.004614 + 9 9 2652511 1099 599078 918 7281 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +14 2.118491 + 9 9 2675725 1022 651814 918 7974 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +15 2.234585 + 9 9 2700860 1331 718188 918 8917 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +16 2.341645 + 9 0 2728609 14305 23845396 918 364200 1 25.3747080388505 25.8575495182923 1 1 99.953 99.953 2.69219730372168 +17 2.455272 + 9 9 3586919 1039 583917 918 6869 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +18 2.571329 + 9 9 3609024 1107 611420 918 7364 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +19 2.685878 + 9 9 3632551 727 633041 918 7835 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +20 2.800244 + 9 9 3657359 843 739810 918 9039 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +21 2.906669 + 9 0 3685359 20236 23908348 918 364509 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +22 3.021733 + 9 9 4543659 1129 574257 918 6640 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +23 3.136321 + 9 9 4565153 1287 614888 918 7477 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +24 3.250921 + 9 9 4589062 867 632434 918 7888 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +25 3.3672 + 9 9 4613967 584 746635 918 9193 1 25.3747769054014 25.8576310650414 1 1 99.953 99.953 2.6921953395395 +26 3.473245 + 9 0 4642458 11802 23807723 918 360480 1 25.3746198444182 25.8574854110363 1 1 99.953 99.953 2.69219320151607 +27 3.587991 + 9 9 5492940 1660 594056 918 6804 1 25.3746198444182 25.8574854110363 1 1 99.953 99.953 2.69219320151607 +28 3.702125 + 9 9 5514965 1516 621941 918 7354 1 25.3746198444182 25.8574854110363 1 1 99.953 99.953 2.69219320151607 +29 3.818285 + 9 9 5538441 953 645371 918 7977 1 25.3746198444182 25.8574854110363 1 1 99.953 99.953 2.69219320151607 +30 3.932387 + 9 9 5563578 415 739569 918 9137 1 25.3746198444182 25.8574854110363 1 1 99.953 99.953 2.69219320151607 diff --git a/tests/data/synchropasef_tdf/global_metadata.tsv b/tests/data/synchropasef_tdf/global_metadata.tsv new file mode 100644 index 0000000..9581c9b --- /dev/null +++ b/tests/data/synchropasef_tdf/global_metadata.tsv @@ -0,0 +1,6 @@ +Key Value +OneOverK0AcqRangeLower 0.600000 +OneOverK0AcqRangeUpper 1.600000 +MzAcqRangeLower 50.000000 +MzAcqRangeUpper 1700.000000 +DigitizerNumSamples 434064 diff --git a/tests/test_window_parsing.rs b/tests/test_window_parsing.rs new file mode 100644 index 0000000..536f43e --- /dev/null +++ b/tests/test_window_parsing.rs @@ -0,0 +1,66 @@ +use ionmesh::ms::tdf::{FrameInfoBuilder, GroupingLevel}; + +#[test] +fn test_dia_pasef() { + let finfo_b = FrameInfoBuilder::from_dotd_path("tests/data/diapasef_tdf/data.d".into()); + let finfo = finfo_b.build(); + + assert!(finfo.is_ok()); + + let finfo = finfo.unwrap(); + + // The number of ids in `DiaFrameMsMsWindowGroups` + 1 bc 0 is not used + assert_eq!(finfo.groups.len(), 9); + + assert!(finfo.groups[0].is_none()); + for group in finfo.groups.iter().skip(1) { + assert!(group.is_some()); + } + + // Make sure the grouping is correctly assigned... for diaPASEF it should + // be `QuadWindowGroup` + match finfo.grouping_level { + GroupingLevel::QuadWindowGroup => {} + GroupingLevel::WindowGroup => { + assert!(false); + } + } + + // Make sure the grouping is correct. + // For this diapasef file is 8 * 2 (8 window groups, 2 isolation windows per group) + assert_eq!(finfo.row_to_group.iter().max().unwrap(), &(8 * 2)); + + // println!("{:?}", finfo); + // assert!(false) +} + +#[test] +fn test_synchro_dia_pasef() { + let finfo_b = FrameInfoBuilder::from_dotd_path("tests/data/synchropasef_tdf/data.d".into()); + let finfo = finfo_b.build(); + + assert!(finfo.is_ok()); + + let finfo = finfo.unwrap(); + + // The number of ids in `DiaFrameMsMsWindowGroups` + 1, bc 0 is not used + assert_eq!(finfo.groups.len(), 5); + assert!(finfo.groups[0].is_none()); + for group in finfo.groups.iter().skip(1) { + assert!(group.is_some()); + } + + // Make sure the grouping is correctly assigned... for diaPASEF it should + // be `QuadWindowGroup` + match finfo.grouping_level { + GroupingLevel::QuadWindowGroup => { + assert!(false); + } + GroupingLevel::WindowGroup => {} + } + + // Make sure the grouping is correct. + assert_eq!(finfo.row_to_group.iter().max().unwrap(), &4); + + // println!("{:?}", finfo); +} From 896865ed20eb64ac1c84e490161b959bb849637d Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 6 Jul 2024 13:29:12 -0700 Subject: [PATCH 06/26] updated gh actions to build test data --- .github/workflows/checks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 30e705e..17f9154 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -18,5 +18,7 @@ jobs: - uses: actions/checkout@v3 - name: Build run: cargo build --verbose + - name: Build Test Data + run: cd tests/data && bash build.bash - name: Run tests run: cargo test --verbose From f4078a2909dde3ada686dc8791a7b20b6a74f520 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 6 Jul 2024 14:59:27 -0700 Subject: [PATCH 07/26] (wip) clippy suggestions --- src/aggregation/chromatograms.rs | 10 +++++++--- src/aggregation/dbscan.rs | 10 ++++------ src/aggregation/ms_denoise.rs | 15 +++++++-------- src/ms/frames.rs | 16 +++++----------- src/ms/tdf.rs | 20 ++++++++++---------- 5 files changed, 33 insertions(+), 38 deletions(-) diff --git a/src/aggregation/chromatograms.rs b/src/aggregation/chromatograms.rs index 8cc357f..4e25519 100644 --- a/src/aggregation/chromatograms.rs +++ b/src/aggregation/chromatograms.rs @@ -161,9 +161,13 @@ impl BTreeChromatogram { ((center_rt.unwrap_or(0.) - self.rt_bin_offset.unwrap()) / self.rt_binsize) as i32; let left_start = int_center - (NUM_LOCAL_CHROMATOGRAM_BINS / 2) as i32; - for i in 0..NUM_LOCAL_CHROMATOGRAM_BINS { + for (i, item) in chromatogram_arr + .iter_mut() + .enumerate() + .take(NUM_LOCAL_CHROMATOGRAM_BINS) + { let bin = left_start + i as i32; - chromatogram_arr[i] = *self.btree.get(&bin).unwrap_or(&0) as f32; + *item = *self.btree.get(&bin).unwrap_or(&0) as f32; } } @@ -198,7 +202,7 @@ impl + AddAssign + Default + AsPrimitive, const NBINS: u let mut mag_b = T::default(); for i in 0..NBINS { let other_index = i + other_vs_self_offset as usize; - if other_index >= other.chromatogram.len() || other_index < 0 { + if other_index >= other.chromatogram.len() { continue; } diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan.rs index c18f86c..4f036f6 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan.rs @@ -155,7 +155,7 @@ fn _dbscan< >( indexed_points: &'a T, prefiltered_peaks: &Vec, - quad_points: &Vec>, + quad_points: &[NDPoint], min_n: usize, min_intensity: u64, intensity_sorted_indices: &Vec<(usize, I)>, @@ -282,10 +282,8 @@ fn _dbscan< if local_neighbors.len() >= min_n && neighbor_intensity_total >= min_intensity { // Keep only the neighbors that are not already in a cluster - local_neighbors.retain(|i| match cluster_labels[**i] { - ClusterLabel::Cluster(_) => false, - _ => true, - }); + local_neighbors + .retain(|i| !matches!(cluster_labels[**i], ClusterLabel::Cluster(_))); // Keep only the neighbors that are within the max extension distance // It might be worth setting a different max extension distance for the mz and mobility dimensions. @@ -376,7 +374,7 @@ fn reassign_centroid< centroids: Vec, indexed_points: &'a I, centroid_converter: C, - elements: &Vec, + elements: &[T], def_aggregator: F, log_level: utils::LogLevel, expansion_factors: &[f32; N], diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 0fad03e..507e9c7 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -79,7 +79,7 @@ fn _sanity_check_framestats( frame_stats_end: FrameStats, frame_index: usize, ) { - let intensity_ratio = frame_stats_end.tot_intensity / frame_stats_end.tot_intensity; + let intensity_ratio = frame_stats_start.tot_intensity / frame_stats_end.tot_intensity; let peak_ratio = frame_stats_end.num_peaks as f64 / frame_stats_start.num_peaks as f64; trace!( @@ -354,10 +354,7 @@ pub fn read_all_ms1_denoising( let ims_converter = reader.get_scan_converter().unwrap(); let mz_converter = reader.get_tof_converter().unwrap(); - frames.retain(|frame| match frame.frame_type { - timsrust::FrameType::MS1 => true, - _ => false, - }); + frames.retain(|frame| matches!(frame.frame_type, timsrust::FrameType::MS1)); // let min_intensity = 100u64; // let min_n: usize = 3; @@ -394,9 +391,11 @@ pub fn read_all_dia_denoising( let mz_converter = reader.get_tof_converter().unwrap(); timer.stop(true); - frames.retain(|frame| match frame.frame_type { - timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF) => true, - _ => false, + frames.retain(|frame| { + matches!( + frame.frame_type, + timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF) + ) }); let denoiser = DIAFrameDenoiser { diff --git a/src/ms/frames.rs b/src/ms/frames.rs index daf8cc8..3018194 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -112,12 +112,12 @@ impl<'a> FrameSlice<'a> { let tof_indices = &frame.tof_indices[indprt_start..indptr_end]; let intensities = &frame.intensities[indprt_start..indptr_end]; debug_assert!(tof_indices.len() == intensities.len()); - debug_assert!(indptr_end - indprt_start == tof_indices.len() as usize); + debug_assert!(indptr_end - indprt_start == tof_indices.len()); #[cfg(debug_assertions)] { for i in 1..(scan_offsets.len() - 1) { debug_assert!(scan_offsets[i] <= scan_offsets[i + 1]); - debug_assert!((scan_offsets[i + 1] - scan_start) <= tof_indices.len() as usize); + debug_assert!((scan_offsets[i + 1] - scan_start) <= tof_indices.len()); } } @@ -129,7 +129,7 @@ impl<'a> FrameSlice<'a> { rt: frame.rt, frame_type: frame.frame_type, scan_start, - slice_window_info: slice_window_info, + slice_window_info, } } } @@ -200,7 +200,7 @@ impl DenseFrameWindow { } }; - let frame = DenseFrame::from_frame_window(&frame_window, ims_converter, mz_converter); + let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); DenseFrameWindow { frame, @@ -280,7 +280,7 @@ impl DenseFrame { info!("frame_window.scan_start: {}", frame_window.scan_start); } debug_assert!(ims >= 0.0); - expanded_scan_indices.extend(vec![ims; num_tofs as usize]); + expanded_scan_indices.extend(vec![ims; num_tofs]); last_scan_offset = *index_offset; } debug_assert!(last_scan_offset == frame_window.tof_indices.len()); @@ -316,12 +316,6 @@ impl DenseFrame { } } - fn concatenate(mut self, other: DenseFrame) -> DenseFrame { - self.raw_peaks.extend(other.raw_peaks); - self.sorted = None; - self - } - pub fn sort_by_mz(&mut self) { match self.sorted { Some(SortingOrder::Mz) => (), diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index af53611..0e2cf68 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -1,4 +1,4 @@ -use log::{debug, info, trace}; +use log::{debug, info}; use sqlx::Pool; use sqlx::{FromRow, Sqlite, SqlitePool}; @@ -67,14 +67,14 @@ impl ScanRange { } } -impl Into for ScanRange { - fn into(self) -> FrameMsMsWindowInfo { +impl From for FrameMsMsWindowInfo { + fn from(val: ScanRange) -> Self { FrameMsMsWindowInfo { - mz_start: self.iso_low, - mz_end: self.iso_high, - window_group_id: self.window_group_id.into(), - within_window_quad_group_id: self.within_window_quad_group_id.into(), - global_quad_row_id: self.row_id.into(), + mz_start: val.iso_low, + mz_end: val.iso_high, + window_group_id: val.window_group_id, + within_window_quad_group_id: val.within_window_quad_group_id, + global_quad_row_id: val.row_id, } } } @@ -197,7 +197,7 @@ impl DIAFrameInfo { let slice_w_info: MsMsFrameSliceWindowInfo = MsMsFrameSliceWindowInfo::SingleWindow(scan_range.clone().into()); let frame_slice = FrameSlice::slice_frame( - &frame, + frame, scan_range.scan_start, scan_range.scan_end, Some(slice_w_info), @@ -236,7 +236,7 @@ impl DIAFrameInfo { } GroupingLevel::QuadWindowGroup => { let frame_windows = self - .split_frame(&frame, group) + .split_frame(frame, group) .expect("Error splitting frame"); for frame_window in frame_windows { match &frame_window.slice_window_info { From d7f37ce07b01171569cecb676e759a7db799b7e8 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 7 Jul 2024 02:38:14 -0700 Subject: [PATCH 08/26] (broken,wip) Almost complete migration of dbscan to runner struct --- src/aggregation/aggregators.rs | 16 +- src/aggregation/{ => dbscan}/dbscan.rs | 285 ++++---------- src/aggregation/dbscan/denseframe_dbscan.rs | 68 ++++ src/aggregation/dbscan/mod.rs | 4 + src/aggregation/dbscan/runner.rs | 408 ++++++++++++++++++++ src/aggregation/dbscan/utils.rs | 64 +++ src/aggregation/ms_denoise.rs | 6 +- src/aggregation/tracing.rs | 17 +- src/ms/frames.rs | 232 ++++++++++- src/space/kdtree.rs | 4 +- src/space/quad.rs | 4 +- src/space/space_generics.rs | 27 +- 12 files changed, 873 insertions(+), 262 deletions(-) rename src/aggregation/{ => dbscan}/dbscan.rs (66%) create mode 100644 src/aggregation/dbscan/denseframe_dbscan.rs create mode 100644 src/aggregation/dbscan/mod.rs create mode 100644 src/aggregation/dbscan/runner.rs create mode 100644 src/aggregation/dbscan/utils.rs diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index fd8e8bd..7229df0 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,8 +1,7 @@ use crate::ms::frames::TimsPeak; use crate::space::space_generics::HasIntensity; use crate::utils; -use num::cast::AsPrimitive; -use std::ops::{Add, Div, Mul, Sub}; +use std::ops::Add; use rayon::prelude::*; @@ -23,7 +22,7 @@ pub enum ClusterLabel { /// R: The type of the aggregated point. /// S: The type of the aggregator. /// -pub trait ClusterAggregator { +pub trait ClusterAggregator: Send + Sync { fn add(&mut self, elem: &T); fn aggregate(&self) -> R; fn combine(self, other: Self) -> Self; @@ -69,19 +68,10 @@ impl ClusterAggregator for TimsPeakAggregator { } pub fn aggregate_clusters< - T: HasIntensity + Send + Clone + Copy, + T: HasIntensity + Send + Clone + Copy, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, - Z: AsPrimitive - + Send - + Sync - + Add - + PartialOrd - + Div - + Mul - + Default - + Sub, >( tot_clusters: u64, cluster_labels: Vec>, diff --git a/src/aggregation/dbscan.rs b/src/aggregation/dbscan/dbscan.rs similarity index 66% rename from src/aggregation/dbscan.rs rename to src/aggregation/dbscan/dbscan.rs index 4f036f6..ca11d34 100644 --- a/src/aggregation/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,25 +1,16 @@ -use std::collections::BTreeMap; -use std::ops::{Add, Div, Mul, Sub}; - -use crate::ms::frames::TimsPeak; -use crate::space::space_generics::NDPointConverter; -use crate::utils; -use crate::utils::within_distance_apply; - -use crate::aggregation::aggregators::{ - aggregate_clusters, ClusterAggregator, ClusterLabel, TimsPeakAggregator, +use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; +use crate::space::kdtree::RadiusKDTree; +use crate::space::space_generics::{ + HasIntensity, NDPoint, NDPointConverter, QueriableIndexedPoints, }; -use crate::aggregation::converters::{BypassDenseFrameBackConverter, DenseFrameConverter}; -use crate::ms::frames; -use crate::space::space_generics::{HasIntensity, IndexedPoints, NDPoint}; +use crate::utils; use indicatif::ProgressIterator; use log::{debug, info, trace}; - +use num::cast::AsPrimitive; use rayon::prelude::*; +use std::ops::Add; -use crate::space::kdtree::RadiusKDTree; - -use num::cast::AsPrimitive; +use crate::aggregation::dbscan::utils::FilterFunCache; /// Density-based spatial clustering of applications with noise (DBSCAN) /// @@ -58,99 +49,65 @@ use num::cast::AsPrimitive; /// 3. Use an intensity threshold intead of a minimum number of neighbors. /// 4. There are ways to define the limits to the extension of a cluster. -impl HasIntensity for frames::TimsPeak { - fn intensity(&self) -> u32 { - self.intensity - } -} - -struct FilterFunCache<'a> { - cache: Vec>>, - filter_fun: Box<&'a dyn Fn(&usize, &usize) -> bool>, - tot_queries: u64, - cached_queries: u64, +// TODO: rename quad_points, since this no longer uses a quadtree. +// TODO: refactor to take a filter function instead of requiting +// a min intensity and an intensity trait. +// TODO: rename the pre-filtered... +// TODO: reimplement this a two-stage pass, where the first in parallel +// gets the neighbors and the second does the iterative aggregation. +// THERE BE DRAGONS in this function ... I am thinking about sane ways to +// refactor it to make it more readable and maintainable. + +struct DBScanTimers { + main: utils::ContextTimer, + filter_fun_cache_timer: utils::ContextTimer, + outer_loop_nn_timer: utils::ContextTimer, + inner_loop_nn_timer: utils::ContextTimer, + local_neighbor_filter_timer: utils::ContextTimer, + outer_intensity_calculation: utils::ContextTimer, + inner_intensity_calculation: utils::ContextTimer, } -impl<'a> FilterFunCache<'a> { - fn new(filter_fun: Box<&'a dyn Fn(&usize, &usize) -> bool>, capacity: usize) -> Self { +impl DBScanTimers { + fn new() -> Self { + let mut timer = utils::ContextTimer::new("internal_dbscan", false, utils::LogLevel::DEBUG); + let mut filter_fun_cache_timer = timer.start_sub_timer("filter_fun_cache"); + let mut outer_loop_nn_timer = timer.start_sub_timer("outer_loop_nn"); + let mut inner_loop_nn_timer = timer.start_sub_timer("inner_loop_nn"); + let mut local_neighbor_filter_timer = timer.start_sub_timer("local_neighbor_filter"); + let mut outer_intensity_calculation = timer.start_sub_timer("outer_intensity_calculation"); + let mut inner_intensity_calculation = timer.start_sub_timer("inner_intensity_calculation"); Self { - cache: vec![None; capacity], - filter_fun, - tot_queries: 0, - cached_queries: 0, + main: timer, + filter_fun_cache_timer, + outer_loop_nn_timer, + inner_loop_nn_timer, + local_neighbor_filter_timer, + outer_intensity_calculation, + inner_intensity_calculation, } } - fn get(&mut self, elem_idx: usize, reference_idx: usize) -> bool { - // Get the value if it exists, call the functon, insert it and - // return it if it doesn't. - self.tot_queries += 1; - - let out: bool = match self.cache[elem_idx] { - Some(ref map) => match map.get(&reference_idx) { - Some(x) => { - self.cached_queries += 1; - *x - } - None => { - let out: bool = (self.filter_fun)(&elem_idx, &reference_idx); - self.insert(elem_idx, reference_idx, out); - self.insert(reference_idx, elem_idx, out); - out - } - }, - None => { - let out = (self.filter_fun)(&elem_idx, &reference_idx); - self.insert(elem_idx, reference_idx, out); - self.insert(reference_idx, elem_idx, out); - out - } - }; - out - } - - fn insert(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { - match self.cache[elem_idx] { - Some(ref mut map) => { - _ = map.insert(reference_idx, value); - } - None => { - let mut map = BTreeMap::new(); - map.insert(reference_idx, value); - self.cache[elem_idx] = Some(map); - } + fn report_if_gt_us(self, min_time: f64) { + if self.timer.cumtime.as_micros() > min_time { + self.main.report(); + self.filter_fun_cache_timer.report(); + self.outer_loop_nn_timer.report(); + self.inner_loop_nn_timer.report(); + self.local_neighbor_filter_timer.report(); + self.outer_intensity_calculation.report(); + self.inner_intensity_calculation.report(); } } - - fn get_stats(&self) -> (u64, u64) { - (self.tot_queries, self.cached_queries) - } } -// TODO: rename quad_points, since this no longer uses a quadtree. -// TODO: refactor to take a filter function instead of requiting -// a min intensity and an intensity trait. -// TODO: rename the pre-filtered... -// TODO: reimplement this a two-stage pass, where the first in parallel -// gets the neighbors and the second does the iterative aggregation. - // THIS IS A BOTTLENECK FUNCTION fn _dbscan< 'a, const N: usize, C: NDPointConverter, - I: Div - + Add - + Mul - + Sub - + Default - + Copy - + PartialOrd - + AsPrimitive - + Send - + Sync, - E: Sync + HasIntensity, - T: IndexedPoints<'a, N, usize> + std::marker::Sync, + E: Sync + HasIntensity, + T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, FF: Fn(&E, &E) -> bool + Send + Sync + Copy, >( indexed_points: &'a T, @@ -158,7 +115,7 @@ fn _dbscan< quad_points: &[NDPoint], min_n: usize, min_intensity: u64, - intensity_sorted_indices: &Vec<(usize, I)>, + intensity_sorted_indices: &Vec<(usize, u64)>, filter_fun: Option, converter: C, progress: bool, @@ -169,15 +126,7 @@ fn _dbscan< let mut cluster_labels = vec![ClusterLabel::Unassigned; prefiltered_peaks.len()]; let mut cluster_id = 0; - - let mut timer = utils::ContextTimer::new("internal_dbscan", false, utils::LogLevel::DEBUG); - - let mut filter_fun_cache_timer = timer.start_sub_timer("filter_fun_cache"); - let mut outer_loop_nn_timer = timer.start_sub_timer("outer_loop_nn"); - let mut inner_loop_nn_timer = timer.start_sub_timer("inner_loop_nn"); - let mut local_neighbor_filter_timer = timer.start_sub_timer("local_neighbor_filter"); - let mut outer_intensity_calculation = timer.start_sub_timer("outer_intensity_calculation"); - let mut inner_intensity_calculation = timer.start_sub_timer("inner_intensity_calculation"); + let mut timers = DBScanTimers::new(); let usize_filterfun = |a: &usize, b: &usize| { filter_fun.expect("filter_fun should be Some")( @@ -188,9 +137,9 @@ fn _dbscan< let mut filterfun_cache = FilterFunCache::new(Box::new(&usize_filterfun), prefiltered_peaks.len()); let mut filterfun_with_cache = |elem_idx: usize, reference_idx: usize| { - filter_fun_cache_timer.reset_start(); + timers.filter_fun_cache_timer.reset_start(); let out = filterfun_cache.get(elem_idx, reference_idx); - filter_fun_cache_timer.stop(false); + timers.filter_fun_cache_timer.stop(false); out }; @@ -206,10 +155,10 @@ fn _dbscan< continue; } - outer_loop_nn_timer.reset_start(); + timers.outer_loop_nn_timer.reset_start(); let query_elems = converter.convert_to_bounds_query(&quad_points[point_index]); let mut neighbors = indexed_points.query_ndrange(&query_elems.0, query_elems.1); - outer_loop_nn_timer.stop(false); + timers.outer_loop_nn_timer.stop(false); if neighbors.len() < min_n { cluster_labels[point_index] = ClusterLabel::Noise; @@ -232,12 +181,12 @@ fn _dbscan< } // Q: Do I need to care about overflows here? - Sebastian - outer_intensity_calculation.reset_start(); + timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors .iter() .map(|i| prefiltered_peaks[**i].intensity().as_()) .sum::(); - outer_intensity_calculation.stop(false); + timers.outer_intensity_calculation.stop(false); if neighbor_intensity_total < min_intensity { cluster_labels[point_index] = ClusterLabel::Noise; @@ -261,24 +210,24 @@ fn _dbscan< cluster_labels[neighbor_index] = ClusterLabel::Cluster(cluster_id); - inner_loop_nn_timer.reset_start(); + timers.inner_loop_nn_timer.reset_start(); let inner_query_elems = converter.convert_to_bounds_query(&quad_points[*neighbor]); let mut local_neighbors = indexed_points.query_ndrange(&inner_query_elems.0, inner_query_elems.1); - inner_loop_nn_timer.stop(false); + timers.inner_loop_nn_timer.stop(false); if filter_fun.is_some() { local_neighbors.retain(|i| filterfun_with_cache(**i, point_index)) // .filter(|i| filter_fun.unwrap()(&prefiltered_peaks[**i], &query_peak)) } - inner_intensity_calculation.reset_start(); + timers.inner_intensity_calculation.reset_start(); let query_intensity = prefiltered_peaks[neighbor_index].intensity(); let neighbor_intensity_total = local_neighbors .iter() .map(|i| prefiltered_peaks[**i].intensity().as_()) .sum::(); - inner_intensity_calculation.stop(false); + timers.inner_intensity_calculation.stop(false); if local_neighbors.len() >= min_n && neighbor_intensity_total >= min_intensity { // Keep only the neighbors that are not already in a cluster @@ -287,7 +236,7 @@ fn _dbscan< // Keep only the neighbors that are within the max extension distance // It might be worth setting a different max extension distance for the mz and mobility dimensions. - local_neighbor_filter_timer.reset_start(); + timers.local_neighbor_filter_timer.reset_start(); local_neighbors.retain(|i| { let going_downhill = prefiltered_peaks[**i].intensity() <= query_intensity; @@ -310,14 +259,14 @@ fn _dbscan< going_downhill && within_distance }); - local_neighbor_filter_timer.stop(false); + timers.local_neighbor_filter_timer.stop(false); seed_set.extend(local_neighbors); } } } - let (tot_queries, cached_queries) = filterfun_cache.get_stats(); + let (tot_queries, cached_queries) = timers.filterfun_cache.get_stats(); if tot_queries > 1000 { let cache_hit_rate = cached_queries as f64 / tot_queries as f64; @@ -334,16 +283,8 @@ fn _dbscan< ); } - timer.stop(false); - if timer.cumtime.as_micros() > 1000000 { - timer.report(); - filter_fun_cache_timer.report(); - outer_loop_nn_timer.report(); - inner_loop_nn_timer.report(); - local_neighbor_filter_timer.report(); - outer_intensity_calculation.report(); - inner_intensity_calculation.report(); - } + timers.main.stop(false); + timers.report_if_gt_us(1000000); (cluster_id, cluster_labels) } @@ -355,21 +296,12 @@ fn _dbscan< fn reassign_centroid< 'a, const N: usize, - T: HasIntensity + Send + Clone + Copy, + T: HasIntensity + Send + Clone + Copy, C: NDPointConverter, - I: IndexedPoints<'a, N, usize> + std::marker::Sync, + I: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, - Z: AsPrimitive - + Send - + Sync - + Add - + PartialOrd - + Div - + Mul - + Default - + Sub, >( centroids: Vec, indexed_points: &'a I, @@ -413,19 +345,9 @@ pub fn dbscan_generic< C2: NDPointConverter, R: Send, G: Sync + Send + ClusterAggregator, - T: HasIntensity + Send + Clone + Copy + Sync, + T: HasIntensity + Send + Clone + Copy + Sync, F: Fn() -> G + Send + Sync, const N: usize, - // Z is usually u32 or u64 - Z: AsPrimitive - + Send - + Sync - + Add - + PartialOrd - + Div - + Mul - + Default - + Sub, FF: Send + Sync + Fn(&T, &T) -> bool, >( converter: C, @@ -506,66 +428,3 @@ pub fn dbscan_generic< None => centroids, } } - -type FFTimsPeak = fn(&TimsPeak, &TimsPeak) -> bool; -// bool> -pub fn dbscan_denseframe( - mut denseframe: frames::DenseFrame, - mz_scaling: f64, - max_mz_extension: f64, - ims_scaling: f32, - max_ims_extension: f32, - min_n: usize, - min_intensity: u64, -) -> frames::DenseFrame { - let out_frame_type: timsrust::FrameType = denseframe.frame_type; - let out_rt: f64 = denseframe.rt; - let out_index: usize = denseframe.index; - - let prefiltered_peaks = { - denseframe.sort_by_mz(); - - let keep_vector = within_distance_apply( - &denseframe.raw_peaks, - &|peak| peak.mz, - &mz_scaling, - &|i_right, i_left| (i_right - i_left) >= min_n, - ); - - // Filter the peaks and replace the raw peaks with the filtered peaks. - - denseframe - .raw_peaks - .clone() - .into_iter() - .zip(keep_vector) - .filter(|(_, b)| *b) - .map(|(peak, _)| peak) // Clone the TimsPeak - .collect::>() - }; - - let converter = DenseFrameConverter { - mz_scaling, - ims_scaling, - }; - let peak_vec: Vec = dbscan_generic( - converter, - prefiltered_peaks, - min_n, - min_intensity, - TimsPeakAggregator::default, - None::<&FFTimsPeak>, - None, - true, - &[max_mz_extension as f32, max_ims_extension], - None::, - ); - - frames::DenseFrame { - raw_peaks: peak_vec, - index: out_index, - rt: out_rt, - frame_type: out_frame_type, - sorted: None, - } -} diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs new file mode 100644 index 0000000..af0fa7e --- /dev/null +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -0,0 +1,68 @@ +use crate::aggregation::aggregators::TimsPeakAggregator; +use crate::aggregation::converters::{BypassDenseFrameBackConverter, DenseFrameConverter}; +use crate::aggregation::dbscan::dbscan::dbscan_generic; +use crate::ms::frames::{DenseFrame, TimsPeak}; +use crate::utils::within_distance_apply; + +type FFTimsPeak = fn(&TimsPeak, &TimsPeak) -> bool; +// bool> +pub fn dbscan_denseframe( + mut denseframe: DenseFrame, + mz_scaling: f64, + max_mz_extension: f64, + ims_scaling: f32, + max_ims_extension: f32, + min_n: usize, + min_intensity: u64, +) -> DenseFrame { + let out_frame_type: timsrust::FrameType = denseframe.frame_type; + let out_rt: f64 = denseframe.rt; + let out_index: usize = denseframe.index; + + let prefiltered_peaks = { + denseframe.sort_by_mz(); + + let keep_vector = within_distance_apply( + &denseframe.raw_peaks, + &|peak| peak.mz, + &mz_scaling, + &|i_right, i_left| (i_right - i_left) >= min_n, + ); + + // Filter the peaks and replace the raw peaks with the filtered peaks. + + denseframe + .raw_peaks + .clone() + .into_iter() + .zip(keep_vector) + .filter(|(_, b)| *b) + .map(|(peak, _)| peak) // Clone the TimsPeak + .collect::>() + }; + + let converter = DenseFrameConverter { + mz_scaling, + ims_scaling, + }; + let peak_vec: Vec = dbscan_generic( + converter, + prefiltered_peaks, + min_n, + min_intensity, + TimsPeakAggregator::default, + None::<&FFTimsPeak>, + None, + true, + &[max_mz_extension as f32, max_ims_extension], + None::, + ); + + DenseFrame { + raw_peaks: peak_vec, + index: out_index, + rt: out_rt, + frame_type: out_frame_type, + sorted: None, + } +} diff --git a/src/aggregation/dbscan/mod.rs b/src/aggregation/dbscan/mod.rs new file mode 100644 index 0000000..2fd3bb8 --- /dev/null +++ b/src/aggregation/dbscan/mod.rs @@ -0,0 +1,4 @@ +pub mod dbscan; +pub mod denseframe_dbscan; +pub mod runner; +pub mod utils; diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs new file mode 100644 index 0000000..c21889f --- /dev/null +++ b/src/aggregation/dbscan/runner.rs @@ -0,0 +1,408 @@ +use std::process::Output; + +use crate::space::space_generics::NDPointConverter; +use crate::space::space_generics::{HasIntensity, NDPoint, QueriableIndexedPoints}; +use crate::utils; +use crate::utils::within_distance_apply; +use indicatif::ProgressIterator; +use log::{debug, info, trace}; + +use rayon::prelude::*; + +use crate::aggregation::aggregators::{ + aggregate_clusters, ClusterAggregator, ClusterLabel, TimsPeakAggregator, +}; +use crate::space::kdtree::RadiusKDTree; + +use crate::aggregation::dbscan::utils::FilterFunCache; + +struct ClusterLabels { + cluster_labels: Vec>, + num_clusters: u64, +} + +impl ClusterLabels { + fn new(num_labels: usize) -> Self { + let cluster_labels = vec![ClusterLabel::Unassigned; num_labels]; + Self { + cluster_labels, + num_clusters: 0, + } + } + + fn set_cluster(&mut self, index: usize, cluster_id: u64) { + self.cluster_labels[index] = ClusterLabel::Cluster(cluster_id); + } + + fn set_new_cluster(&mut self, index: usize) { + self.num_clusters += 1; + self.set_cluster(index, self.num_clusters); + } + + fn set_current_cluster(&mut self, index: usize) { + let cluster_id = self.num_clusters; + self.set_cluster(index, cluster_id); + } + + fn set_noise(&mut self, index: usize) { + self.cluster_labels[index] = ClusterLabel::Noise; + } + + fn get(&self, index: usize) -> ClusterLabel { + self.cluster_labels[index] + } +} + +struct DBScanTimers { + main: utils::ContextTimer, + filter_fun_cache_timer: utils::ContextTimer, + outer_loop_nn_timer: utils::ContextTimer, + inner_loop_nn_timer: utils::ContextTimer, + local_neighbor_filter_timer: utils::ContextTimer, + outer_intensity_calculation: utils::ContextTimer, + inner_intensity_calculation: utils::ContextTimer, +} + +impl DBScanTimers { + fn new() -> Self { + let mut timer = utils::ContextTimer::new("internal_dbscan", false, utils::LogLevel::DEBUG); + let mut filter_fun_cache_timer = timer.start_sub_timer("filter_fun_cache"); + let mut outer_loop_nn_timer = timer.start_sub_timer("outer_loop_nn"); + let mut inner_loop_nn_timer = timer.start_sub_timer("inner_loop_nn"); + let mut local_neighbor_filter_timer = timer.start_sub_timer("local_neighbor_filter"); + let mut outer_intensity_calculation = timer.start_sub_timer("outer_intensity_calculation"); + let mut inner_intensity_calculation = timer.start_sub_timer("inner_intensity_calculation"); + Self { + main: timer, + filter_fun_cache_timer, + outer_loop_nn_timer, + inner_loop_nn_timer, + local_neighbor_filter_timer, + outer_intensity_calculation, + inner_intensity_calculation, + } + } + + fn report_if_gt_us(self, min_time: u128) { + if self.main.cumtime.as_micros() > min_time { + self.main.report(); + self.filter_fun_cache_timer.report(); + self.outer_loop_nn_timer.report(); + self.inner_loop_nn_timer.report(); + self.local_neighbor_filter_timer.report(); + self.outer_intensity_calculation.report(); + self.inner_intensity_calculation.report(); + } + } +} + +struct CandidateCountMetrics { + initial_candidates_counts: utils::RollingSDCalculator, + final_candidates_counts: utils::RollingSDCalculator, +} + +impl CandidateCountMetrics { + fn new() -> Self { + Self { + initial_candidates_counts: utils::RollingSDCalculator::default(), + final_candidates_counts: utils::RollingSDCalculator::default(), + } + } +} + +struct DBSCANRunnerState<'a> { + cluster_labels: ClusterLabels, + filter_fun_cache: FilterFunCache<'a>, + timers: DBScanTimers, + candidate_metrics: CandidateCountMetrics, +} + +impl DBSCANRunnerState<'_> { + fn new<'a>( + nlabels: usize, + min_n: usize, + usize_filterfun: &dyn Fn(&usize, &usize) -> bool, + ) -> Self { + let mut cluster_labels = ClusterLabels::new(nlabels); + let filter_fun_cache = FilterFunCache::new(Box::new(&usize_filterfun), nlabels); + let timers = DBScanTimers::new(); + let candidate_metrics = CandidateCountMetrics::new(); + + Self { + cluster_labels, + filter_fun_cache, + timers, + candidate_metrics, + } + } + + fn create_progress_bar(&self, len: usize, visible: bool) -> indicatif::ProgressBar { + if visible { + indicatif::ProgressBar::new(len as u64) + } else { + indicatif::ProgressBar::hidden() + } + } +} + +//trait FilterFunction: for<'a, 'b> Fn<(&'a E, &'b E)> + Sized{} + +struct DBSCANRunner<'a, const N: usize, C, E> { + min_n: usize, + min_intensity: u64, + filter_fun: &'a (dyn Fn(&E, &E) -> bool + Send + Sync), + converter: C, + progress: bool, + max_extension_distances: &'a [f32; N], + state: Option>, +} + +// C: NDPointConverter, +// C2: NDPointConverter, +// R: Send, +// G: Sync + Send + ClusterAggregator, +// T: HasIntensity + Send + Clone + Copy + Sync, +// F: Fn() -> G + Send + Sync, +// const N: usize, +// FF: Send + Sync + Fn(&T, &T) -> bool, + +impl<'a, const N: usize, C, E> DBSCANRunner<'a, N, C, E> +where + C: NDPointConverter, + E: Sync + HasIntensity, + //T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, +{ + fn run( + &self, + prefiltered_peaks: &'a Vec, + intensity_sorted_indices: &'a Vec<(usize, f64)>, + ) -> ClusterLabels { + let usize_filterfun = |a: &usize, b: &usize| { + (self.filter_fun)(&prefiltered_peaks[*a], &prefiltered_peaks[*b]) + }; + self.state = Some(DBSCANRunnerState::new( + intensity_sorted_indices.len(), + self.min_n, + &usize_filterfun, + )); + + let mut state = self.state.expect("State is created in this function."); + // Q: if filter fun is required ... why is it an option? + self.process_points(state, prefiltered_peaks, intensity_sorted_indices); + + state.timers.main.stop(false); + state.timers.report_if_gt_us(1000000); + state.cluster_labels + } + + fn process_points( + &self, + mut state: DBSCANRunnerState<'a>, + prefiltered_peaks: &'a Vec, + intensity_sorted_indices: &'a Vec<(usize, f64)>, + ) { + let my_progbar = state.create_progress_bar(intensity_sorted_indices.len(), self.progress); + + for (point_index, _intensity) in intensity_sorted_indices.iter().progress_with(my_progbar) { + self.process_single_point( + *point_index, + prefiltered_peaks, + &mut state.cluster_labels, + &mut state.filter_fun_cache, + &mut state.timers, + &mut state.candidate_metrics, + ); + } + } + + fn process_single_point( + &self, + point_index: usize, + prefiltered_peaks: &'a Vec, + cluster_labels: &mut ClusterLabels, + filter_fun_cache: &mut FilterFunCache<'a>, + timers: &mut DBScanTimers, + cc_metrics: &mut CandidateCountMetrics, + ) { + if cluster_labels.get(point_index) != ClusterLabel::Unassigned { + return; + } + + let neighbors = self.find_neighbors( + point_index, + prefiltered_peaks, + filter_fun_cache, + timers, + cc_metrics, + ); + if !self.is_core_point(&neighbors, prefiltered_peaks, timers) { + cluster_labels.set_noise(point_index); + return; + } + + self.expand_cluster( + point_index, + neighbors, + prefiltered_peaks, + cluster_labels, + filter_fun_cache, + timers, + ); + } + + fn find_neighbors( + &self, + point_index: usize, + prefiltered_peaks: &'a Vec, + filter_fun_cache: &mut FilterFunCache<'a>, + timers: &mut DBScanTimers, + cc_metrics: &mut CandidateCountMetrics, + ) -> Vec { + timers.outer_loop_nn_timer.reset_start(); + let query_elems = self + .converter + .convert_to_bounds_query(&quad_points[point_index]); + let mut candidate_neighbors = self + .indexed_points + .query_ndrange(&query_elems.0, query_elems.1); + timers.outer_loop_nn_timer.stop(false); + + let num_initial_candidates = candidate_neighbors.len(); + candidate_neighbors.retain(|i| filter_fun_cache(**i, point_index)); + + let neighbors = candidate_neighbors; + let candidates_after_filter = neighbors.len(); + cc_metrics + .initial_candidates_counts + .add(num_initial_candidates as f32, 1); + cc_metrics + .final_candidates_counts + .add(candidates_after_filter as f32, 1); + + neighbors + } + + fn is_core_point( + &self, + neighbors: &[usize], + prefiltered_peaks: &'a Vec, + timers: &mut DBScanTimers, + ) -> bool { + timers.outer_intensity_calculation.reset_start(); + let neighbor_intensity_total = neighbors + .iter() + .map(|i| prefiltered_peaks[**i].intensity().as_()) + .sum::(); + timers.outer_intensity_calculation.stop(false); + return neighbor_intensity_total >= self.min_intensity; + } + + fn expand_cluster( + &self, + point_index: usize, + mut neighbors: Vec, + prefiltered_peaks: &'a Vec, + cluster_labels: &mut ClusterLabels, + filter_fun_cache: &mut FilterFunCache<'a>, + timers: &mut DBScanTimers, + ) { + cluster_labels.set_new_cluster(point_index); + + let mut seed_set: Vec = Vec::new(); + seed_set.extend(neighbors); + + while let Some(neighbor) = seed_set.pop() { + let neighbor_index = neighbor; + if cluster_labels.get(neighbor_index) == ClusterLabel::Noise { + cluster_labels.set_current_cluster(neighbor_index); + } + + if cluster_labels.get(neighbor_index) != ClusterLabel::Unassigned { + continue; + } + + cluster_labels.set_current_cluster(neighbor_index); + + timers.inner_loop_nn_timer.reset_start(); + let inner_query_elems = converter.convert_to_bounds_query(&quad_points[*neighbor]); + let mut local_neighbors = + indexed_points.query_ndrange(&inner_query_elems.0, inner_query_elems.1); + timers.inner_loop_nn_timer.stop(false); + + local_neighbors.retain(|i| filterfun_with_cache(**i, point_index)); + + timers.inner_intensity_calculation.reset_start(); + let query_intensity = prefiltered_peaks[neighbor_index].intensity(); + let neighbor_intensity_total = local_neighbors + .iter() + .map(|i| prefiltered_peaks[**i].intensity().as_()) + .sum::(); + timers.inner_intensity_calculation.stop(false); + + if local_neighbors.len() >= min_n && neighbor_intensity_total >= min_intensity { + local_neighbors + .retain(|i| !matches!(cluster_labels[**i], ClusterLabel::Cluster(_))); + + timers.local_neighbor_filter_timer.reset_start(); + local_neighbors.retain(|i| { + let going_downhill = prefiltered_peaks[**i].intensity() <= query_intensity; + + let p = &quad_points[**i]; + let query_point = query_elems.1.unwrap(); + let mut within_distance = true; + for ((p, q), max_dist) in p + .values + .iter() + .zip(query_point.values) + .zip(self.max_extension_distances.iter()) + { + let dist = (p - q).abs(); + within_distance = within_distance && dist <= *max_dist; + if !within_distance { + break; + } + } + + going_downhill && within_distance + }); + timers.local_neighbor_filter_timer.stop(false); + + seed_set.extend(local_neighbors); + } + } + } +} + +fn _dbscan<'a, const N: usize, C, I, E, T, FF>( + indexed_points: &'a T, + prefiltered_peaks: &'a Vec, + quad_points: &'a [NDPoint], + min_n: usize, + min_intensity: u64, + intensity_sorted_indices: &'a Vec<(usize, I)>, + filter_fun: Option, + converter: C, + progress: bool, + max_extension_distances: &'a [f32; N], +) -> (u64, Vec>) { + let runner = DBSCANRunner::new( + indexed_points, + quad_points, + min_n, + min_intensity, + filter_fun, + converter, + progress, + max_extension_distances, + ); + + let mut cluster_labels = vec![ClusterLabel::Unassigned; prefiltered_peaks.len()]; + + let cluster_id = runner.run( + prefiltered_peaks, + intensity_sorted_indices, + &mut cluster_labels, + ); + + (cluster_id, cluster_labels) +} diff --git a/src/aggregation/dbscan/utils.rs b/src/aggregation/dbscan/utils.rs new file mode 100644 index 0000000..e4808d3 --- /dev/null +++ b/src/aggregation/dbscan/utils.rs @@ -0,0 +1,64 @@ +use std::collections::BTreeMap; + +pub struct FilterFunCache<'a> { + cache: Vec>>, + filter_fun: Box<&'a dyn Fn(&usize, &usize) -> bool>, + tot_queries: u64, + cached_queries: u64, +} + +impl<'a> FilterFunCache<'a> { + pub fn new(filter_fun: Box<&'a dyn Fn(&usize, &usize) -> bool>, capacity: usize) -> Self { + Self { + cache: vec![None; capacity], + filter_fun, + tot_queries: 0, + cached_queries: 0, + } + } + + pub fn get(&mut self, elem_idx: usize, reference_idx: usize) -> bool { + // Get the value if it exists, call the functon, insert it and + // return it if it doesn't. + self.tot_queries += 1; + + let out: bool = match self.cache[elem_idx] { + Some(ref map) => match map.get(&reference_idx) { + Some(x) => { + self.cached_queries += 1; + *x + } + None => { + let out: bool = (self.filter_fun)(&elem_idx, &reference_idx); + self.insert(elem_idx, reference_idx, out); + self.insert(reference_idx, elem_idx, out); + out + } + }, + None => { + let out = (self.filter_fun)(&elem_idx, &reference_idx); + self.insert(elem_idx, reference_idx, out); + self.insert(reference_idx, elem_idx, out); + out + } + }; + out + } + + fn insert(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { + match self.cache[elem_idx] { + Some(ref mut map) => { + _ = map.insert(reference_idx, value); + } + None => { + let mut map = BTreeMap::new(); + map.insert(reference_idx, value); + self.cache[elem_idx] = Some(map); + } + } + } + + fn get_stats(&self) -> (u64, u64) { + (self.tot_queries, self.cached_queries) + } +} diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 507e9c7..ea3a6c4 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,6 +1,6 @@ use core::panic; -use crate::aggregation::dbscan; +use crate::aggregation::dbscan::denseframe_dbscan::dbscan_denseframe; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; @@ -117,7 +117,7 @@ fn _denoise_denseframe( let index = frame.index; // this is the line that matters - let denoised_frame = dbscan::dbscan_denseframe( + let denoised_frame = dbscan_denseframe( frame, mz_scaling, max_mz_extension, @@ -253,7 +253,7 @@ struct FrameDenoiser { impl<'a> Denoiser<'a, Frame, DenseFrame, Converters, Option> for FrameDenoiser { fn denoise(&self, frame: Frame) -> DenseFrame { - let denseframe = DenseFrame::new(&frame, &self.ims_converter, &self.mz_converter); + let denseframe = DenseFrame::from_frame(&frame, &self.ims_converter, &self.mz_converter); _denoise_denseframe( denseframe, self.min_n, diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 20e7b68..04b07da 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -2,7 +2,7 @@ use crate::aggregation::aggregators::ClusterAggregator; use crate::aggregation::chromatograms::{ BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, }; -use crate::aggregation::dbscan::dbscan_generic; +use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::ms::frames::DenseFrameWindow; use crate::space::space_generics::NDBoundary; use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike}; @@ -119,16 +119,9 @@ pub struct TimeTimsPeak { pub n_peaks: u32, } -impl HasIntensity for TimeTimsPeak { - fn intensity(&self) -> u32 { - let o = self.intensity.try_into(); - match o { - Ok(x) => x, - Err(_) => { - error!("Intensity overflowed u32"); - u32::MAX - } - } +impl HasIntensity for TimeTimsPeak { + fn intensity(&self) -> u64 { + self.intensity } } @@ -179,7 +172,7 @@ impl BaseTrace { } } -impl HasIntensity for BaseTrace { +impl HasIntensity for BaseTrace { fn intensity(&self) -> u64 { self.intensity } diff --git a/src/ms/frames.rs b/src/ms/frames.rs index 3018194..7f6a8e0 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -5,6 +5,7 @@ pub use timsrust::{ }; use crate::ms::tdf::{DIAFrameInfo, ScanRange}; +use crate::space::space_generics::HasIntensity; use log::info; @@ -16,6 +17,12 @@ pub struct TimsPeak { pub npeaks: u32, } +impl HasIntensity for TimsPeak { + fn intensity(&self) -> u64 { + self.intensity as u64 + } +} + #[derive(Debug, Clone, Copy)] pub struct RawTimsPeak { pub intensity: u32, @@ -50,6 +57,73 @@ pub struct FrameMsMsWindowInfo { pub global_quad_row_id: usize, } +pub trait FramePointTolerance { + fn tof_index_range(&self, tof_index: u32) -> (u32, u32); + fn scan_range(&self, scan_index: usize) -> (usize, usize); +} + +struct AbsoluteFramePointTolerance { + tof_index_tolerance: u32, + scan_tolerance: usize, +} + +impl FramePointTolerance for AbsoluteFramePointTolerance { + fn tof_index_range(&self, tof_index: u32) -> (u32, u32) { + let tof_index_tolerance = self.tof_index_tolerance; + ( + tof_index.saturating_sub(tof_index_tolerance), + tof_index.saturating_add(tof_index_tolerance), + ) + } + + fn scan_range(&self, scan_index: usize) -> (usize, usize) { + let scan_tolerance = self.scan_tolerance; + ( + scan_index.saturating_sub(scan_tolerance), + scan_index + scan_tolerance, + ) + } +} + +type Range = (usize, usize); + +pub struct RangeSet { + ranges: Vec, + offset: usize, +} + +impl RangeSet { + fn extend(&mut self, other: RangeSet) { + let new_offset = self.offset.min(other.offset); + let vs_self_offset = self.offset - new_offset; + let vs_other_offset = other.offset - new_offset; + + for item in self.ranges.iter_mut() { + item.0 += vs_self_offset; + item.1 += vs_self_offset; + } + + for item in other.ranges.iter() { + self.ranges + .push((item.0 + vs_other_offset, item.1 + vs_other_offset)); + } + + self.ranges.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + } + + fn any_overlap(&self) -> bool { + let mut last_end = 0; + + for range in self.ranges.iter() { + if range.0 < last_end { + return true; + } + last_end = range.1; + } + false + } +} + #[derive(Debug, Clone)] pub enum MsMsFrameSliceWindowInfo { WindowGroup(usize), @@ -61,10 +135,15 @@ pub enum MsMsFrameSliceWindowInfo { /// 1. every tof-index + intensity represents a peak. /// 2. Scan offsets are monotonically increasing. /// 3. Peaks are arranged in increasing m/z order WITHIN a scan. -/// 4. Getting the peaks for scan #x in the frame is done by subsetting -/// the tof indices and intensities. -/// - scan_1_intensities = intensities[scan_offsets[1]:scan_offsets[2]] -/// - scan_x_intensities = intensities[scan_offsets[x]:scan_offsets[x+1]] +/// 4. Getting the peaks for scan #x in the frame_slice is done by subsetting +/// the tof indices and intensities, and subtracting the offset of the first +/// scan. +/// - scan_1_intensities = intensities[scan_offsets[1]-scan_offsets[0]:scan_offsets[2]-scan_offsets[0]] +/// - scan_x_intensities = intensities[scan_offsets[x]-scan_offsets[0]:scan_offsets[x+1]-scan_offsets[0]] +/// - NOTE: to get the peaks in the scan #y IN THE FRAME (not the frame slice) +/// you need to add to subtract the scan_start from the scan number. +/// - scan_y_intensities = intensities[scan_offsets[y-scan_start]-scan_offsets[0]:scan_offsets[y-scan_start+1]-scan_offsets[0]] +/// - Then obviously, scans < scan_start are not in the frame slice. /// 5. The m/z values are a function of the tof indices (the measured m/z /// of tof index `x` will be the same within the same run/instrument /// calibration) @@ -132,6 +211,149 @@ impl<'a> FrameSlice<'a> { slice_window_info, } } + + /// Get the global scan number at the local index. + /// + /// This means that ... provided the index of a tof index in the frame slice, + /// this function will return the global scan number that tof index would belong + /// to... in other words, "what is the scan number in the parent frame where peak + /// number `x` in the frame slice would be found in the parent frame?" + pub fn global_scan_at_index(&self, local_index: usize) -> usize { + let search_val = self.scan_offsets[0] + local_index; + let loc = self + .scan_offsets + .binary_search_by(|x| x.partial_cmp(&search_val).unwrap()); + let local_scan_index = match loc { + Ok(mut x) => { + while x > 0 && self.scan_offsets[x - 1] >= search_val { + x -= 1; + } + x + } + Err(x) => x - 1, + }; + self.scan_start + local_scan_index + } + + pub fn explode_scan_numbers(&self) -> Vec { + let mut scan_numbers = Vec::with_capacity(self.tof_indices.len()); + let curr_scan = self.scan_start; + + for (scan_index, index_offset) in self.scan_offsets[1..].iter().enumerate() { + let num_tofs = index_offset - self.scan_offsets[scan_index]; + scan_numbers.extend(vec![curr_scan + scan_index; num_tofs]); + } + + if cfg!(debug_assertions) { + // Check that all are monotonically increasing with min == scan_start + let mut last_scan = 0; + for scan in scan_numbers.iter() { + debug_assert!(*scan >= last_scan); + last_scan = *scan; + } + + debug_assert!(scan_numbers[0] == self.scan_start); + debug_assert!(scan_numbers.len() == self.tof_indices.len()); + debug_assert_eq!( + scan_numbers.last().unwrap(), + &(self.scan_offsets.len() - 1 + self.scan_start) + ); + } + scan_numbers + } + + pub fn tof_intensities_at_scan(&self, scan_number: usize) -> ((&[u32], &[u32]), usize) { + let scan_index = scan_number - self.scan_start; + let offset_offset = self.scan_offsets[0]; + let scan_start = self.scan_offsets[scan_index] - offset_offset; + let scan_end = self.scan_offsets[scan_index + 1] - offset_offset; + let tof_indices = &self.tof_indices[scan_start..scan_end]; + let intensities = &self.intensities[scan_start..scan_end]; + ((tof_indices, intensities), scan_start) + } + + pub fn matching_range_at_scan( + &self, + tof_index: i32, + scan_number: usize, + tolerance: &T, + ) -> Option<(Range, usize)> + where + T: FramePointTolerance, + { + // TODO implement later a two pointer approach for sorted slices of tof indices. + let ((tof_indices, _), start_indptr) = self.tof_intensities_at_scan(scan_number); + let tof_len = tof_indices.len(); + let (start, end) = tolerance.tof_index_range(tof_index as u32); + let tof_index_start = tof_indices.binary_search_by(|x| x.partial_cmp(&start).unwrap()); + let tof_index_end = tof_indices.binary_search_by(|x| x.partial_cmp(&end).unwrap()); + let tof_index_start = match tof_index_start { + Ok(mut x) => { + while x > 0 && tof_indices[x - 1] >= start { + x -= 1; + } + x + } + Err(x) => x, + }; + + if tof_index_start >= tof_len { + return None; + }; + + let tof_index_end = match tof_index_end { + Ok(x) => x, + Err(mut x) => { + while x < tof_len && tof_indices[x] < end { + x += 1; + } + x + } + }; + + if tof_index_end > tof_index_start { + Some(((tof_index_start, tof_index_end), start_indptr)) + } else { + None + } + } + + pub fn matching_rangeset( + &self, + tof_index: i32, + scan_number: usize, + tolerance: &T, + ) -> Option + where + T: FramePointTolerance, + { + let mut ranges = RangeSet { + ranges: Vec::new(), + offset: 0, + }; + + let scan_range = tolerance.scan_range(scan_number); + for scan_number in scan_range.0..scan_range.1 { + if let Some(range_offset) = + self.matching_range_at_scan(tof_index, scan_number, tolerance) + { + ranges.ranges.push(( + range_offset.0 .0 - range_offset.1, + range_offset.0 .1 - range_offset.1, + )); + } + } + + if cfg!(debug_assertions) { + debug_assert!(!ranges.any_overlap()); + } + + if ranges.ranges.len() == 0 { + None + } else { + Some(ranges) + } + } } #[derive(Debug, Clone)] @@ -215,7 +437,7 @@ impl DenseFrameWindow { } impl DenseFrame { - pub fn new( + pub fn from_frame( frame: &Frame, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index e4ca69f..7bf92cb 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -1,4 +1,4 @@ -use crate::space::space_generics::{IndexedPoints, NDBoundary, NDPoint}; +use crate::space::space_generics::{NDBoundary, NDPoint, QueriableIndexedPoints}; use log::warn; // Implements a kdtree with several minor differences. @@ -250,7 +250,7 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { } } -impl<'a, T, const D: usize> IndexedPoints<'a, D, T> for RadiusKDTree<'a, T, D> { +impl<'a, T, const D: usize> QueriableIndexedPoints<'a, D, T> for RadiusKDTree<'a, T, D> { fn query_ndpoint(&'a self, point: &NDPoint) -> Vec<&'a T> { self.query(point) } diff --git a/src/space/quad.rs b/src/space/quad.rs index 6a65c62..7e5799d 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -1,4 +1,4 @@ -use crate::space::space_generics::{IndexedPoints, NDBoundary, NDPoint}; +use crate::space::space_generics::{NDBoundary, NDPoint, QueriableIndexedPoints}; use core::panic; use log::trace; @@ -240,7 +240,7 @@ impl<'a, T> RadiusQuadTree<'a, T> { // TODO: rename count_neigh_monotonocally_increasing // because it can do more than just count neighbors.... -impl<'a, T> IndexedPoints<'a, 2, T> for RadiusQuadTree<'a, T> { +impl<'a, T> QueriableIndexedPoints<'a, 2, T> for RadiusQuadTree<'a, T> { fn query_ndpoint(&'a self, point: &NDPoint<2>) -> Vec<&'a T> { self.query(point) .into_iter() diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 02e30f5..70d5e0d 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -91,7 +91,7 @@ pub struct NDPoint { } // Q: is there any instance where T is not usize? -pub trait IndexedPoints<'a, const N: usize, T> { +pub trait QueriableIndexedPoints<'a, const N: usize, T> { fn query_ndpoint(&'a self, point: &NDPoint) -> Vec<&'a T>; fn query_ndrange( &'a self, @@ -100,17 +100,20 @@ pub trait IndexedPoints<'a, const N: usize, T> { ) -> Vec<&'a T>; } -pub trait HasIntensity -where - T: Copy - + PartialOrd - + std::ops::Add - + std::ops::Sub - + std::ops::Mul - + std::ops::Div - + Default, -{ - fn intensity(&self) -> T; +pub trait AsNDPoints { + fn get_ndpoint(&self, index: usize) -> NDPoint; + fn num_ndpoints(&self) -> usize; + fn intensity_at(&self, index: usize) -> u64; + fn weight_at(&self, index: usize) -> u64 { + self.intensity_at(index) + } +} + +pub trait HasIntensity: Sync { + fn intensity(&self) -> u64; + fn weight(&self) -> u64 { + self.intensity() + } } pub trait TraceLike> { From 0cc1b91d93959bb1f7fa47449ffdbf685669df94 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 7 Jul 2024 12:11:03 -0700 Subject: [PATCH 09/26] (refactor) Complete migration of dbscan to runner struct --- src/aggregation/aggregators.rs | 3 +- src/aggregation/dbscan/dbscan.rs | 294 +------------------ src/aggregation/dbscan/denseframe_dbscan.rs | 3 +- src/aggregation/dbscan/runner.rs | 309 +++++++++++++------- src/aggregation/dbscan/utils.rs | 39 ++- src/aggregation/tracing.rs | 2 +- 6 files changed, 234 insertions(+), 416 deletions(-) diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index 7229df0..95cfa99 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,13 +1,12 @@ use crate::ms::frames::TimsPeak; use crate::space::space_generics::HasIntensity; use crate::utils; -use std::ops::Add; use rayon::prelude::*; // I Dont really like having this here but I am not sure where else to // define it ... since its needed by the aggregation functions -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone, Copy)] pub enum ClusterLabel { Unassigned, Noise, diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index ca11d34..abbedf1 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,293 +1,12 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; use crate::space::kdtree::RadiusKDTree; -use crate::space::space_generics::{ - HasIntensity, NDPoint, NDPointConverter, QueriableIndexedPoints, -}; +use crate::space::space_generics::{HasIntensity, NDPointConverter, QueriableIndexedPoints}; use crate::utils; -use indicatif::ProgressIterator; use log::{debug, info, trace}; -use num::cast::AsPrimitive; use rayon::prelude::*; use std::ops::Add; -use crate::aggregation::dbscan::utils::FilterFunCache; - -/// Density-based spatial clustering of applications with noise (DBSCAN) -/// -/// This module implements a variant of dbscan with a couple of modifications -/// with respect to the vanilla implementation. -/// -/// Pseudocode from wikipedia. -/// Donate to wikipedia y'all. :3 -// -/// DBSCAN(DB, distFunc, eps, minPts) { -/// C := 0 /* Cluster counter */ -/// for each point P in database DB { -/// if label(P) ≠ undefined then continue /* Previously processed in inner loop */ -/// Neighbors N := RangeQuery(DB, distFunc, P, eps) /* Find neighbors */ -/// if |N| < minPts then { /* Density check */ -/// label(P) := Noise /* Label as Noise */ -/// continue -/// } -/// C := C + 1 /* next cluster label */ -/// label(P) := C /* Label initial point */ -/// SeedSet S := N \ {P} /* Neighbors to expand */ -/// for each point Q in S { /* Process every seed point Q */ -/// if label(Q) = Noise then label(Q) := C /* Change Noise to border point */ -/// if label(Q) ≠ undefined then continue /* Previously processed (e.g., border point) */ -/// label(Q) := C /* Label neighbor */ -/// Neighbors N := RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ -/// if |N| ≥ minPts then { /* Density check (if Q is a core point) */ -/// S := S ∪ N /* Add new neighbors to seed set */ -/// } -/// } -/// } -/// } -/// Variations ... -/// 1. Indexing is am implementation detail to find the neighbors (generic indexer) -/// 2. Sort the pointd by decreasing intensity (more intense points adopt first). -/// 3. Use an intensity threshold intead of a minimum number of neighbors. -/// 4. There are ways to define the limits to the extension of a cluster. - -// TODO: rename quad_points, since this no longer uses a quadtree. -// TODO: refactor to take a filter function instead of requiting -// a min intensity and an intensity trait. -// TODO: rename the pre-filtered... -// TODO: reimplement this a two-stage pass, where the first in parallel -// gets the neighbors and the second does the iterative aggregation. -// THERE BE DRAGONS in this function ... I am thinking about sane ways to -// refactor it to make it more readable and maintainable. - -struct DBScanTimers { - main: utils::ContextTimer, - filter_fun_cache_timer: utils::ContextTimer, - outer_loop_nn_timer: utils::ContextTimer, - inner_loop_nn_timer: utils::ContextTimer, - local_neighbor_filter_timer: utils::ContextTimer, - outer_intensity_calculation: utils::ContextTimer, - inner_intensity_calculation: utils::ContextTimer, -} - -impl DBScanTimers { - fn new() -> Self { - let mut timer = utils::ContextTimer::new("internal_dbscan", false, utils::LogLevel::DEBUG); - let mut filter_fun_cache_timer = timer.start_sub_timer("filter_fun_cache"); - let mut outer_loop_nn_timer = timer.start_sub_timer("outer_loop_nn"); - let mut inner_loop_nn_timer = timer.start_sub_timer("inner_loop_nn"); - let mut local_neighbor_filter_timer = timer.start_sub_timer("local_neighbor_filter"); - let mut outer_intensity_calculation = timer.start_sub_timer("outer_intensity_calculation"); - let mut inner_intensity_calculation = timer.start_sub_timer("inner_intensity_calculation"); - Self { - main: timer, - filter_fun_cache_timer, - outer_loop_nn_timer, - inner_loop_nn_timer, - local_neighbor_filter_timer, - outer_intensity_calculation, - inner_intensity_calculation, - } - } - - fn report_if_gt_us(self, min_time: f64) { - if self.timer.cumtime.as_micros() > min_time { - self.main.report(); - self.filter_fun_cache_timer.report(); - self.outer_loop_nn_timer.report(); - self.inner_loop_nn_timer.report(); - self.local_neighbor_filter_timer.report(); - self.outer_intensity_calculation.report(); - self.inner_intensity_calculation.report(); - } - } -} - -// THIS IS A BOTTLENECK FUNCTION -fn _dbscan< - 'a, - const N: usize, - C: NDPointConverter, - E: Sync + HasIntensity, - T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, - FF: Fn(&E, &E) -> bool + Send + Sync + Copy, ->( - indexed_points: &'a T, - prefiltered_peaks: &Vec, - quad_points: &[NDPoint], - min_n: usize, - min_intensity: u64, - intensity_sorted_indices: &Vec<(usize, u64)>, - filter_fun: Option, - converter: C, - progress: bool, - max_extension_distances: &[f32; N], -) -> (u64, Vec>) { - let mut initial_candidates_counts = utils::RollingSDCalculator::default(); - let mut final_candidates_counts = utils::RollingSDCalculator::default(); - - let mut cluster_labels = vec![ClusterLabel::Unassigned; prefiltered_peaks.len()]; - let mut cluster_id = 0; - let mut timers = DBScanTimers::new(); - - let usize_filterfun = |a: &usize, b: &usize| { - filter_fun.expect("filter_fun should be Some")( - &prefiltered_peaks[*a], - &prefiltered_peaks[*b], - ) - }; - let mut filterfun_cache = - FilterFunCache::new(Box::new(&usize_filterfun), prefiltered_peaks.len()); - let mut filterfun_with_cache = |elem_idx: usize, reference_idx: usize| { - timers.filter_fun_cache_timer.reset_start(); - let out = filterfun_cache.get(elem_idx, reference_idx); - timers.filter_fun_cache_timer.stop(false); - out - }; - - let my_progbar = if progress { - indicatif::ProgressBar::new(intensity_sorted_indices.len() as u64) - } else { - indicatif::ProgressBar::hidden() - }; - - for (point_index, _intensity) in intensity_sorted_indices.iter().progress_with(my_progbar) { - let point_index = *point_index; - if cluster_labels[point_index] != ClusterLabel::Unassigned { - continue; - } - - timers.outer_loop_nn_timer.reset_start(); - let query_elems = converter.convert_to_bounds_query(&quad_points[point_index]); - let mut neighbors = indexed_points.query_ndrange(&query_elems.0, query_elems.1); - timers.outer_loop_nn_timer.stop(false); - - if neighbors.len() < min_n { - cluster_labels[point_index] = ClusterLabel::Noise; - continue; - } - - if filter_fun.is_some() { - let num_initial_candidates = neighbors.len(); - neighbors.retain(|i| filterfun_with_cache(**i, point_index)); - // .filter(|i| filter_fun.unwrap()(&prefiltered_peaks[**i], &query_peak)) - - let candidates_after_filter = neighbors.len(); - initial_candidates_counts.add(num_initial_candidates as f32, 1); - final_candidates_counts.add(candidates_after_filter as f32, 1); - - if neighbors.len() < min_n { - cluster_labels[point_index] = ClusterLabel::Noise; - continue; - } - } - - // Q: Do I need to care about overflows here? - Sebastian - timers.outer_intensity_calculation.reset_start(); - let neighbor_intensity_total = neighbors - .iter() - .map(|i| prefiltered_peaks[**i].intensity().as_()) - .sum::(); - timers.outer_intensity_calculation.stop(false); - - if neighbor_intensity_total < min_intensity { - cluster_labels[point_index] = ClusterLabel::Noise; - continue; - } - - cluster_id += 1; - cluster_labels[point_index] = ClusterLabel::Cluster(cluster_id); - let mut seed_set: Vec<&usize> = Vec::new(); - seed_set.extend(neighbors); - - while let Some(neighbor) = seed_set.pop() { - let neighbor_index = *neighbor; - if cluster_labels[neighbor_index] == ClusterLabel::Noise { - cluster_labels[neighbor_index] = ClusterLabel::Cluster(cluster_id); - } - - if cluster_labels[neighbor_index] != ClusterLabel::Unassigned { - continue; - } - - cluster_labels[neighbor_index] = ClusterLabel::Cluster(cluster_id); - - timers.inner_loop_nn_timer.reset_start(); - let inner_query_elems = converter.convert_to_bounds_query(&quad_points[*neighbor]); - let mut local_neighbors = - indexed_points.query_ndrange(&inner_query_elems.0, inner_query_elems.1); - timers.inner_loop_nn_timer.stop(false); - - if filter_fun.is_some() { - local_neighbors.retain(|i| filterfun_with_cache(**i, point_index)) - // .filter(|i| filter_fun.unwrap()(&prefiltered_peaks[**i], &query_peak)) - } - - timers.inner_intensity_calculation.reset_start(); - let query_intensity = prefiltered_peaks[neighbor_index].intensity(); - let neighbor_intensity_total = local_neighbors - .iter() - .map(|i| prefiltered_peaks[**i].intensity().as_()) - .sum::(); - timers.inner_intensity_calculation.stop(false); - - if local_neighbors.len() >= min_n && neighbor_intensity_total >= min_intensity { - // Keep only the neighbors that are not already in a cluster - local_neighbors - .retain(|i| !matches!(cluster_labels[**i], ClusterLabel::Cluster(_))); - - // Keep only the neighbors that are within the max extension distance - // It might be worth setting a different max extension distance for the mz and mobility dimensions. - timers.local_neighbor_filter_timer.reset_start(); - local_neighbors.retain(|i| { - let going_downhill = prefiltered_peaks[**i].intensity() <= query_intensity; - - let p = &quad_points[**i]; - let query_point = query_elems.1.unwrap(); - // Using minkowski distance with p = 1, manhattan distance. - let mut within_distance = true; - for ((p, q), max_dist) in p - .values - .iter() - .zip(query_point.values) - .zip(max_extension_distances.iter()) - { - let dist = (p - q).abs(); - within_distance = within_distance && dist <= *max_dist; - if !within_distance { - break; - } - } - - going_downhill && within_distance - }); - timers.local_neighbor_filter_timer.stop(false); - - seed_set.extend(local_neighbors); - } - } - } - - let (tot_queries, cached_queries) = timers.filterfun_cache.get_stats(); - - if tot_queries > 1000 { - let cache_hit_rate = cached_queries as f64 / tot_queries as f64; - info!( - "Cache hit rate: {} / {} = {}", - cached_queries, tot_queries, cache_hit_rate - ); - - let avg_initial_candidates = initial_candidates_counts.get_mean(); - let avg_final_candidates = final_candidates_counts.get_mean(); - debug!( - "Avg initial candidates: {} Avg final candidates: {}", - avg_initial_candidates, avg_final_candidates - ); - } - - timers.main.stop(false); - timers.report_if_gt_us(1000000); - - (cluster_id, cluster_labels) -} +use crate::aggregation::dbscan::runner::_dbscan; // Pretty simple function ... it uses every passed centroid, converts it to a point // and generates a new centroid that aggregates all the points in its range. @@ -348,14 +67,13 @@ pub fn dbscan_generic< T: HasIntensity + Send + Clone + Copy + Sync, F: Fn() -> G + Send + Sync, const N: usize, - FF: Send + Sync + Fn(&T, &T) -> bool, >( converter: C, prefiltered_peaks: Vec, min_n: usize, min_intensity: u64, def_aggregator: F, - extra_filter_fun: Option<&FF>, + extra_filter_fun: Option<&(dyn Fn(&T, &T) -> bool + Send + Sync)>, log_level: Option, keep_unclustered: bool, max_extension_distances: &[f32; N], @@ -392,7 +110,7 @@ pub fn dbscan_generic< i_timer.stop(true); let mut i_timer = timer.start_sub_timer("dbscan"); - let (tot_clusters, cluster_labels) = _dbscan( + let cluster_labels = _dbscan( &tree, &prefiltered_peaks, &ndpoints, @@ -407,8 +125,8 @@ pub fn dbscan_generic< i_timer.stop(true); let centroids = aggregate_clusters( - tot_clusters, - cluster_labels, + cluster_labels.num_clusters, + cluster_labels.cluster_labels, &prefiltered_peaks, &def_aggregator, log_level, diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index af0fa7e..501bcc8 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -4,7 +4,6 @@ use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::ms::frames::{DenseFrame, TimsPeak}; use crate::utils::within_distance_apply; -type FFTimsPeak = fn(&TimsPeak, &TimsPeak) -> bool; // bool> pub fn dbscan_denseframe( mut denseframe: DenseFrame, @@ -51,7 +50,7 @@ pub fn dbscan_denseframe( min_n, min_intensity, TimsPeakAggregator::default, - None::<&FFTimsPeak>, + None::<&(dyn Fn(&TimsPeak, &TimsPeak) -> bool + Send + Sync)>, None, true, &[max_mz_extension as f32, max_ims_extension], diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index c21889f..b8beb32 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,24 +1,54 @@ -use std::process::Output; - use crate::space::space_generics::NDPointConverter; use crate::space::space_generics::{HasIntensity, NDPoint, QueriableIndexedPoints}; use crate::utils; -use crate::utils::within_distance_apply; use indicatif::ProgressIterator; -use log::{debug, info, trace}; use rayon::prelude::*; -use crate::aggregation::aggregators::{ - aggregate_clusters, ClusterAggregator, ClusterLabel, TimsPeakAggregator, -}; -use crate::space::kdtree::RadiusKDTree; - +use crate::aggregation::aggregators::ClusterLabel; use crate::aggregation::dbscan::utils::FilterFunCache; -struct ClusterLabels { - cluster_labels: Vec>, - num_clusters: u64, +/// Density-based spatial clustering of applications with noise (DBSCAN) +/// +/// This module implements a variant of dbscan with a couple of modifications +/// with respect to the vanilla implementation. +/// +/// Pseudocode from wikipedia. +/// Donate to wikipedia y'all. :3 +// +/// DBSCAN(DB, distFunc, eps, minPts) { +/// C := 0 /* Cluster counter */ +/// for each point P in database DB { +/// if label(P) ≠ undefined then continue /* Previously processed in inner loop */ +/// Neighbors N := RangeQuery(DB, distFunc, P, eps) /* Find neighbors */ +/// if |N| < minPts then { /* Density check */ +/// label(P) := Noise /* Label as Noise */ +/// continue +/// } +/// C := C + 1 /* next cluster label */ +/// label(P) := C /* Label initial point */ +/// SeedSet S := N \ {P} /* Neighbors to expand */ +/// for each point Q in S { /* Process every seed point Q */ +/// if label(Q) = Noise then label(Q) := C /* Change Noise to border point */ +/// if label(Q) ≠ undefined then continue /* Previously processed (e.g., border point) */ +/// label(Q) := C /* Label neighbor */ +/// Neighbors N := RangeQuery(DB, distFunc, Q, eps) /* Find neighbors */ +/// if |N| ≥ minPts then { /* Density check (if Q is a core point) */ +/// S := S ∪ N /* Add new neighbors to seed set */ +/// } +/// } +/// } +/// } +/// Variations ... +/// 1. Indexing is am implementation detail to find the neighbors (generic indexer) +/// 2. Sort the pointd by decreasing intensity (more intense points adopt first). +/// 3. Use an intensity threshold intead of a minimum number of neighbors. +/// 4. There are ways to define the limits to the extension of a cluster. + +#[derive(Debug, Clone)] +pub struct ClusterLabels { + pub cluster_labels: Vec>, + pub num_clusters: u64, } impl ClusterLabels { @@ -65,13 +95,13 @@ struct DBScanTimers { impl DBScanTimers { fn new() -> Self { - let mut timer = utils::ContextTimer::new("internal_dbscan", false, utils::LogLevel::DEBUG); - let mut filter_fun_cache_timer = timer.start_sub_timer("filter_fun_cache"); - let mut outer_loop_nn_timer = timer.start_sub_timer("outer_loop_nn"); - let mut inner_loop_nn_timer = timer.start_sub_timer("inner_loop_nn"); - let mut local_neighbor_filter_timer = timer.start_sub_timer("local_neighbor_filter"); - let mut outer_intensity_calculation = timer.start_sub_timer("outer_intensity_calculation"); - let mut inner_intensity_calculation = timer.start_sub_timer("inner_intensity_calculation"); + let timer = utils::ContextTimer::new("internal_dbscan", false, utils::LogLevel::DEBUG); + let filter_fun_cache_timer = timer.start_sub_timer("filter_fun_cache"); + let outer_loop_nn_timer = timer.start_sub_timer("outer_loop_nn"); + let inner_loop_nn_timer = timer.start_sub_timer("inner_loop_nn"); + let local_neighbor_filter_timer = timer.start_sub_timer("local_neighbor_filter"); + let outer_intensity_calculation = timer.start_sub_timer("outer_intensity_calculation"); + let inner_intensity_calculation = timer.start_sub_timer("inner_intensity_calculation"); Self { main: timer, filter_fun_cache_timer, @@ -83,7 +113,7 @@ impl DBScanTimers { } } - fn report_if_gt_us(self, min_time: u128) { + fn report_if_gt_us(&self, min_time: u128) { if self.main.cumtime.as_micros() > min_time { self.main.report(); self.filter_fun_cache_timer.report(); @@ -110,21 +140,25 @@ impl CandidateCountMetrics { } } -struct DBSCANRunnerState<'a> { +struct DBSCANRunnerState { cluster_labels: ClusterLabels, - filter_fun_cache: FilterFunCache<'a>, + filter_fun_cache: Option, timers: DBScanTimers, candidate_metrics: CandidateCountMetrics, } -impl DBSCANRunnerState<'_> { - fn new<'a>( - nlabels: usize, - min_n: usize, - usize_filterfun: &dyn Fn(&usize, &usize) -> bool, - ) -> Self { - let mut cluster_labels = ClusterLabels::new(nlabels); - let filter_fun_cache = FilterFunCache::new(Box::new(&usize_filterfun), nlabels); +impl DBSCANRunnerState { + fn new

(nlabels: usize, usize_filterfun: Option

) -> Self + where + P: Fn(&usize, &usize) -> bool + Send + Sync, + { + let cluster_labels = ClusterLabels::new(nlabels); + + let filter_fun_cache = match usize_filterfun { + Some(_) => Some(FilterFunCache::new(nlabels)), + None => None, + }; + //FilterFunCache::new(Box::new(&usize_filterfun), nlabels); let timers = DBScanTimers::new(); let candidate_metrics = CandidateCountMetrics::new(); @@ -150,11 +184,17 @@ impl DBSCANRunnerState<'_> { struct DBSCANRunner<'a, const N: usize, C, E> { min_n: usize, min_intensity: u64, - filter_fun: &'a (dyn Fn(&E, &E) -> bool + Send + Sync), + filter_fun: Option<&'a (dyn Fn(&E, &E) -> bool + Send + Sync)>, converter: C, progress: bool, max_extension_distances: &'a [f32; N], - state: Option>, +} + +struct DBSCANPoints<'a, const N: usize, E> { + prefiltered_peaks: &'a Vec, + intensity_sorted_indices: &'a Vec<(usize, u64)>, + indexed_points: &'a (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), + quad_points: &'a [NDPoint], } // C: NDPointConverter, @@ -166,61 +206,87 @@ struct DBSCANRunner<'a, const N: usize, C, E> { // const N: usize, // FF: Send + Sync + Fn(&T, &T) -> bool, -impl<'a, const N: usize, C, E> DBSCANRunner<'a, N, C, E> +impl<'a, 'b: 'a, const N: usize, C, E> DBSCANRunner<'a, N, C, E> where C: NDPointConverter, E: Sync + HasIntensity, - //T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, { fn run( &self, - prefiltered_peaks: &'a Vec, - intensity_sorted_indices: &'a Vec<(usize, f64)>, + prefiltered_peaks: &'b Vec, + intensity_sorted_indices: &'b Vec<(usize, u64)>, + indexed_points: &'b (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), + quad_points: &'b [NDPoint], ) -> ClusterLabels { - let usize_filterfun = |a: &usize, b: &usize| { - (self.filter_fun)(&prefiltered_peaks[*a], &prefiltered_peaks[*b]) + let usize_filterfun = match self.filter_fun { + Some(filterfun) => { + let cl = |a: &usize, b: &usize| { + filterfun(&prefiltered_peaks[*a], &prefiltered_peaks[*b]) + }; + let bind = Some(cl); + bind + } + None => None, }; - self.state = Some(DBSCANRunnerState::new( - intensity_sorted_indices.len(), - self.min_n, - &usize_filterfun, - )); + // |a: &usize, b: &usize| { + // (self.filter_fun)(&prefiltered_peaks[*a], &prefiltered_peaks[*b]) + // }; + let mut state = DBSCANRunnerState::new(intensity_sorted_indices.len(), usize_filterfun); - let mut state = self.state.expect("State is created in this function."); + let points: DBSCANPoints = DBSCANPoints { + prefiltered_peaks, + intensity_sorted_indices, + indexed_points, + quad_points, + }; // Q: if filter fun is required ... why is it an option? - self.process_points(state, prefiltered_peaks, intensity_sorted_indices); + state = self.process_points(state, &points); + state = self.report_timers(state); + self.take_cluster_labels(state) + } + + fn report_timers(&self, mut state: DBSCANRunnerState) -> DBSCANRunnerState { state.timers.main.stop(false); state.timers.report_if_gt_us(1000000); + state + } + + fn take_cluster_labels(&self, state: DBSCANRunnerState) -> ClusterLabels { state.cluster_labels } fn process_points( &self, - mut state: DBSCANRunnerState<'a>, - prefiltered_peaks: &'a Vec, - intensity_sorted_indices: &'a Vec<(usize, f64)>, - ) { - let my_progbar = state.create_progress_bar(intensity_sorted_indices.len(), self.progress); - - for (point_index, _intensity) in intensity_sorted_indices.iter().progress_with(my_progbar) { + mut state: DBSCANRunnerState, + points: &DBSCANPoints<'a, N, E>, + ) -> DBSCANRunnerState { + let my_progbar = + state.create_progress_bar(points.intensity_sorted_indices.len(), self.progress); + + for (point_index, _intensity) in points + .intensity_sorted_indices + .iter() + .progress_with(my_progbar) + { self.process_single_point( *point_index, - prefiltered_peaks, + &points, &mut state.cluster_labels, &mut state.filter_fun_cache, &mut state.timers, &mut state.candidate_metrics, ); } + state } fn process_single_point( &self, point_index: usize, - prefiltered_peaks: &'a Vec, + points: &DBSCANPoints<'a, N, E>, cluster_labels: &mut ClusterLabels, - filter_fun_cache: &mut FilterFunCache<'a>, + filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) { @@ -228,22 +294,18 @@ where return; } - let neighbors = self.find_neighbors( - point_index, - prefiltered_peaks, - filter_fun_cache, - timers, - cc_metrics, - ); - if !self.is_core_point(&neighbors, prefiltered_peaks, timers) { + let (neighbors, ref_point) = + self.find_neighbors(point_index, points, filter_fun_cache, timers, cc_metrics); + if !self.is_core_point(&neighbors, points.prefiltered_peaks, timers) { cluster_labels.set_noise(point_index); return; } self.expand_cluster( point_index, + ref_point.unwrap(), neighbors, - prefiltered_peaks, + points, cluster_labels, filter_fun_cache, timers, @@ -253,22 +315,43 @@ where fn find_neighbors( &self, point_index: usize, - prefiltered_peaks: &'a Vec, - filter_fun_cache: &mut FilterFunCache<'a>, + points: &DBSCANPoints<'a, N, E>, + filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, - ) -> Vec { + ) -> (Vec, Option<&NDPoint>) { timers.outer_loop_nn_timer.reset_start(); let query_elems = self .converter - .convert_to_bounds_query(&quad_points[point_index]); - let mut candidate_neighbors = self + .convert_to_bounds_query(&points.quad_points[point_index]); + let mut candidate_neighbors = points .indexed_points - .query_ndrange(&query_elems.0, query_elems.1); + .query_ndrange(&query_elems.0, query_elems.1) + .iter() + .map(|x| **x) + .collect::>(); timers.outer_loop_nn_timer.stop(false); + if filter_fun_cache.is_none() { + return (candidate_neighbors, query_elems.1); + } + let num_initial_candidates = candidate_neighbors.len(); - candidate_neighbors.retain(|i| filter_fun_cache(**i, point_index)); + candidate_neighbors.retain(|i| { + let tmp = filter_fun_cache.as_mut().unwrap(); + let res_in_cache = tmp.get(*i, point_index); + match res_in_cache { + Some(res) => res, + None => { + let res = (self.filter_fun.unwrap())( + &points.prefiltered_peaks[*i], + &points.prefiltered_peaks[point_index], + ); + tmp.set(*i, point_index, res); + res + } + } + }); let neighbors = candidate_neighbors; let candidates_after_filter = neighbors.len(); @@ -279,7 +362,7 @@ where .final_candidates_counts .add(candidates_after_filter as f32, 1); - neighbors + (neighbors, query_elems.1) } fn is_core_point( @@ -291,7 +374,7 @@ where timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors .iter() - .map(|i| prefiltered_peaks[**i].intensity().as_()) + .map(|i| prefiltered_peaks[*i].intensity()) .sum::(); timers.outer_intensity_calculation.stop(false); return neighbor_intensity_total >= self.min_intensity; @@ -300,10 +383,11 @@ where fn expand_cluster( &self, point_index: usize, - mut neighbors: Vec, - prefiltered_peaks: &'a Vec, + query_point: &NDPoint, + neighbors: Vec, + points: &DBSCANPoints<'a, N, E>, cluster_labels: &mut ClusterLabels, - filter_fun_cache: &mut FilterFunCache<'a>, + filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) { cluster_labels.set_new_cluster(point_index); @@ -324,31 +408,51 @@ where cluster_labels.set_current_cluster(neighbor_index); timers.inner_loop_nn_timer.reset_start(); - let inner_query_elems = converter.convert_to_bounds_query(&quad_points[*neighbor]); - let mut local_neighbors = - indexed_points.query_ndrange(&inner_query_elems.0, inner_query_elems.1); + let inner_query_elems = self + .converter + .convert_to_bounds_query(&points.quad_points[neighbor]); + let mut local_neighbors = points + .indexed_points + .query_ndrange(&inner_query_elems.0, inner_query_elems.1); timers.inner_loop_nn_timer.stop(false); - local_neighbors.retain(|i| filterfun_with_cache(**i, point_index)); + if filter_fun_cache.is_some() { + local_neighbors.retain(|i| { + let cache = filter_fun_cache.as_mut().unwrap(); + let res = cache.get(**i, point_index); + match res { + Some(res) => res, + None => { + let res = (self.filter_fun.unwrap())( + &points.prefiltered_peaks[**i], + &points.prefiltered_peaks[point_index], + ); + cache.set(**i, point_index, res); + res + } + } + }); + } timers.inner_intensity_calculation.reset_start(); - let query_intensity = prefiltered_peaks[neighbor_index].intensity(); + let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); let neighbor_intensity_total = local_neighbors .iter() - .map(|i| prefiltered_peaks[**i].intensity().as_()) + .map(|i| points.prefiltered_peaks[**i].intensity()) .sum::(); timers.inner_intensity_calculation.stop(false); - if local_neighbors.len() >= min_n && neighbor_intensity_total >= min_intensity { + if local_neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity + { local_neighbors - .retain(|i| !matches!(cluster_labels[**i], ClusterLabel::Cluster(_))); + .retain(|i| !matches!(cluster_labels.get(**i), ClusterLabel::Cluster(_))); timers.local_neighbor_filter_timer.reset_start(); local_neighbors.retain(|i| { - let going_downhill = prefiltered_peaks[**i].intensity() <= query_intensity; + let going_downhill = + points.prefiltered_peaks[**i].intensity() <= query_intensity; - let p = &quad_points[**i]; - let query_point = query_elems.1.unwrap(); + let p: &NDPoint = &points.quad_points[**i]; let mut within_distance = true; for ((p, q), max_dist) in p .values @@ -373,36 +477,39 @@ where } } -fn _dbscan<'a, const N: usize, C, I, E, T, FF>( +pub fn _dbscan< + 'a, + const N: usize, + C: NDPointConverter, + E: Sync + HasIntensity, + T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, +>( indexed_points: &'a T, prefiltered_peaks: &'a Vec, quad_points: &'a [NDPoint], min_n: usize, min_intensity: u64, - intensity_sorted_indices: &'a Vec<(usize, I)>, - filter_fun: Option, + intensity_sorted_indices: &'a Vec<(usize, u64)>, + filter_fun: Option<&'a (dyn Fn(&E, &E) -> bool + Send + Sync)>, converter: C, progress: bool, max_extension_distances: &'a [f32; N], -) -> (u64, Vec>) { - let runner = DBSCANRunner::new( - indexed_points, - quad_points, +) -> ClusterLabels { + let runner = DBSCANRunner { min_n, min_intensity, - filter_fun, converter, progress, + filter_fun: filter_fun, max_extension_distances, - ); - - let mut cluster_labels = vec![ClusterLabel::Unassigned; prefiltered_peaks.len()]; + }; - let cluster_id = runner.run( + let cluster_labels = runner.run( prefiltered_peaks, intensity_sorted_indices, - &mut cluster_labels, + indexed_points, + quad_points, ); - (cluster_id, cluster_labels) + cluster_labels } diff --git a/src/aggregation/dbscan/utils.rs b/src/aggregation/dbscan/utils.rs index e4808d3..5886d35 100644 --- a/src/aggregation/dbscan/utils.rs +++ b/src/aggregation/dbscan/utils.rs @@ -1,50 +1,45 @@ use std::collections::BTreeMap; -pub struct FilterFunCache<'a> { +pub struct FilterFunCache { cache: Vec>>, - filter_fun: Box<&'a dyn Fn(&usize, &usize) -> bool>, tot_queries: u64, cached_queries: u64, } -impl<'a> FilterFunCache<'a> { - pub fn new(filter_fun: Box<&'a dyn Fn(&usize, &usize) -> bool>, capacity: usize) -> Self { +impl FilterFunCache { + pub fn new(capacity: usize) -> Self { Self { cache: vec![None; capacity], - filter_fun, tot_queries: 0, cached_queries: 0, } } - pub fn get(&mut self, elem_idx: usize, reference_idx: usize) -> bool { - // Get the value if it exists, call the functon, insert it and - // return it if it doesn't. + pub fn get(&mut self, elem_idx: usize, reference_idx: usize) -> Option { self.tot_queries += 1; - let out: bool = match self.cache[elem_idx] { + let out: Option = match self.cache[elem_idx] { Some(ref map) => match map.get(&reference_idx) { Some(x) => { self.cached_queries += 1; - *x - } - None => { - let out: bool = (self.filter_fun)(&elem_idx, &reference_idx); - self.insert(elem_idx, reference_idx, out); - self.insert(reference_idx, elem_idx, out); - out + Some(*x) } + None => None, }, - None => { - let out = (self.filter_fun)(&elem_idx, &reference_idx); - self.insert(elem_idx, reference_idx, out); - self.insert(reference_idx, elem_idx, out); - out - } + None => None, }; out } + pub fn set(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { + self.insert_both_ways(elem_idx, reference_idx, value); + } + + fn insert_both_ways(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { + self.insert(elem_idx, reference_idx, value); + self.insert(reference_idx, elem_idx, value); + } + fn insert(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { match self.cache[elem_idx] { Some(ref mut map) => { diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 04b07da..ae7d23c 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -457,7 +457,7 @@ fn _combine_single_window_traces( quad_low_high: window_quad_low_high, btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize), }, - None::<&FFTimeTimsPeak>, + None::<&(dyn Fn(&TimeTimsPeak, &TimeTimsPeak) -> bool + Send + Sync)>, None, false, &max_extension_distances, From 3106cce336403376fc56789a78fc54669c0ea6fa Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 7 Jul 2024 17:05:53 -0700 Subject: [PATCH 10/26] (chore) change in fmt config --- rustfmt.toml | 12 +++++++ src/aggregation/aggregators.rs | 30 ++++++++++++------ src/aggregation/converters.rs | 10 ++++-- src/aggregation/dbscan/utils.rs | 33 +++++++++++++++----- src/aggregation/ms_denoise.rs | 25 ++++++++++++--- src/aggregation/tracing.rs | 55 ++++++++++++++++++++++++++------- src/main.rs | 12 +++---- src/ms/frames.rs | 53 +++++++++++++++++++++---------- src/ms/sorting.rs | 10 ++++-- src/ms/tdf.rs | 34 +++++++++++--------- src/scoring.rs | 19 +++++++++--- src/space/kdtree.rs | 21 ++++++++++--- src/space/quad.rs | 28 ++++++++++++++--- src/space/space_generics.rs | 50 ++++++++++++++++++++++++------ src/utils.rs | 32 +++++++++++++++---- tests/test_window_parsing.rs | 8 ++--- 16 files changed, 326 insertions(+), 106 deletions(-) create mode 100644 rustfmt.toml diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..d966bd7 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,12 @@ + +fn_params_layout = "Vertical" +match_block_trailing_comma = true +newline_style = "Unix" + +## Unstable features :( +# group_imports = "StdExternalCrate" +# imports_granularity = "Module" +# imports_layout = "Vertical" +# merge_imports = true +# format_strings = true +# struct_lit_single_line = false diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index 95cfa99..29b4d55 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -22,9 +22,15 @@ pub enum ClusterLabel { /// S: The type of the aggregator. /// pub trait ClusterAggregator: Send + Sync { - fn add(&mut self, elem: &T); + fn add( + &mut self, + elem: &T, + ); fn aggregate(&self) -> R; - fn combine(self, other: Self) -> Self; + fn combine( + self, + other: Self, + ) -> Self; } #[derive(Default, Debug)] @@ -36,7 +42,10 @@ pub struct TimsPeakAggregator { } impl ClusterAggregator for TimsPeakAggregator { - fn add(&mut self, elem: &TimsPeak) { + fn add( + &mut self, + elem: &TimsPeak, + ) { let f64_intensity = elem.intensity as f64; debug_assert!((elem.intensity as u64) < (u64::MAX - self.cluster_intensity)); self.cluster_intensity += elem.intensity as u64; @@ -56,7 +65,10 @@ impl ClusterAggregator for TimsPeakAggregator { } } - fn combine(self, other: Self) -> Self { + fn combine( + self, + other: Self, + ) -> Self { Self { cluster_intensity: self.cluster_intensity + other.cluster_intensity, cluster_mz: self.cluster_mz + other.cluster_mz, @@ -90,7 +102,7 @@ pub fn aggregate_clusters< let cluster_idx = *cluster_id as usize - 1; let tmp: Option<(usize, T)> = Some((cluster_idx, elements[point_index])); tmp - } + }, _ => None, }) .collect(); @@ -113,7 +125,7 @@ pub fn aggregate_clusters< (Some(l), Some(r)) => { let o = l.combine(r); Some(o) - } + }, (Some(l), None) => Some(l), (None, Some(r)) => Some(r), (None, None) => None, @@ -165,15 +177,15 @@ pub fn aggregate_clusters< ClusterLabel::Cluster(cluster_id) => { let cluster_idx = *cluster_id as usize - 1; cluster_vecs[cluster_idx].add(&(elements[point_index])); - } + }, ClusterLabel::Noise => { if keep_unclustered { let mut oe = def_aggregator(); oe.add(&elements[point_index]); unclustered_points.push(oe); } - } - _ => {} + }, + _ => {}, } } cluster_vecs.extend(unclustered_points); diff --git a/src/aggregation/converters.rs b/src/aggregation/converters.rs index 28ed4ce..d1a5c41 100644 --- a/src/aggregation/converters.rs +++ b/src/aggregation/converters.rs @@ -7,7 +7,10 @@ use crate::space::space_generics::NDPointConverter; pub struct BypassDenseFrameBackConverter {} impl NDPointConverter for BypassDenseFrameBackConverter { - fn convert(&self, _elem: &TimsPeak) -> NDPoint<2> { + fn convert( + &self, + _elem: &TimsPeak, + ) -> NDPoint<2> { panic!("This should never be called") } } @@ -18,7 +21,10 @@ pub struct DenseFrameConverter { } impl NDPointConverter for DenseFrameConverter { - fn convert(&self, elem: &TimsPeak) -> NDPoint<2> { + fn convert( + &self, + elem: &TimsPeak, + ) -> NDPoint<2> { NDPoint { values: [ (elem.mz / self.mz_scaling) as f32, diff --git a/src/aggregation/dbscan/utils.rs b/src/aggregation/dbscan/utils.rs index 5886d35..d28232f 100644 --- a/src/aggregation/dbscan/utils.rs +++ b/src/aggregation/dbscan/utils.rs @@ -15,7 +15,11 @@ impl FilterFunCache { } } - pub fn get(&mut self, elem_idx: usize, reference_idx: usize) -> Option { + pub fn get( + &mut self, + elem_idx: usize, + reference_idx: usize, + ) -> Option { self.tot_queries += 1; let out: Option = match self.cache[elem_idx] { @@ -23,7 +27,7 @@ impl FilterFunCache { Some(x) => { self.cached_queries += 1; Some(*x) - } + }, None => None, }, None => None, @@ -31,25 +35,40 @@ impl FilterFunCache { out } - pub fn set(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { + pub fn set( + &mut self, + elem_idx: usize, + reference_idx: usize, + value: bool, + ) { self.insert_both_ways(elem_idx, reference_idx, value); } - fn insert_both_ways(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { + fn insert_both_ways( + &mut self, + elem_idx: usize, + reference_idx: usize, + value: bool, + ) { self.insert(elem_idx, reference_idx, value); self.insert(reference_idx, elem_idx, value); } - fn insert(&mut self, elem_idx: usize, reference_idx: usize, value: bool) { + fn insert( + &mut self, + elem_idx: usize, + reference_idx: usize, + value: bool, + ) { match self.cache[elem_idx] { Some(ref mut map) => { _ = map.insert(reference_idx, value); - } + }, None => { let mut map = BTreeMap::new(); map.insert(reference_idx, value); self.cache[elem_idx] = Some(map); - } + }, } } diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index ea3a6c4..fc337e5 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -220,8 +220,14 @@ where Z: Clone, Vec: IntoParallelIterator, { - fn denoise(&self, elem: T) -> W; - fn par_denoise_slice(&self, elems: Vec) -> Vec + fn denoise( + &self, + elem: T, + ) -> W; + fn par_denoise_slice( + &self, + elems: Vec, + ) -> Vec where Self: Sync, { @@ -252,7 +258,10 @@ struct FrameDenoiser { } impl<'a> Denoiser<'a, Frame, DenseFrame, Converters, Option> for FrameDenoiser { - fn denoise(&self, frame: Frame) -> DenseFrame { + fn denoise( + &self, + frame: Frame, + ) -> DenseFrame { let denseframe = DenseFrame::from_frame(&frame, &self.ims_converter, &self.mz_converter); _denoise_denseframe( denseframe, @@ -285,7 +294,10 @@ struct DIAFrameDenoiser { impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> for DIAFrameDenoiser { - fn denoise(&self, _frame: Frame) -> Vec { + fn denoise( + &self, + _frame: Frame, + ) -> Vec { panic!("This should not be called") // _denoise_dia_frame( // frame, @@ -300,7 +312,10 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> // self.max_ims_extension, // ) } - fn par_denoise_slice(&self, elems: Vec) -> Vec> + fn par_denoise_slice( + &self, + elems: Vec, + ) -> Vec> where Self: Sync, { diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index ae7d23c..597f36e 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -71,7 +71,10 @@ pub struct BaseTrace { } impl Serialize for BaseTrace { - fn serialize(&self, serializer: S) -> Result + fn serialize( + &self, + serializer: S, + ) -> Result where S: Serializer, { @@ -125,7 +128,10 @@ impl HasIntensity for TimeTimsPeak { } } -pub fn iou(a: &(f32, f32), b: &(f32, f32)) -> f32 { +pub fn iou( + a: &(f32, f32), + b: &(f32, f32), +) -> f32 { let min_ends = a.1.min(b.1); let max_starts = a.0.max(b.0); @@ -162,7 +168,10 @@ mod tests { } impl BaseTrace { - pub fn rt_iou(&self, other: &BaseTrace) -> f32 { + pub fn rt_iou( + &self, + other: &BaseTrace, + ) -> f32 { // TODO change this to be the measured peak width ... let width_a = self.rt_std.max(0.7); let width_b: f32 = other.rt_std.max(0.7); @@ -282,7 +291,10 @@ struct TraceAggregator { } impl ClusterAggregator for TraceAggregator { - fn add(&mut self, peak: &TimeTimsPeak) { + fn add( + &mut self, + peak: &TimeTimsPeak, + ) { let _f64_intensity = peak.intensity as f64; self.mz.add(peak.mz, peak.intensity); debug_assert!(peak.intensity < u64::MAX - self.intensity); @@ -336,7 +348,10 @@ impl ClusterAggregator for TraceAggregator { } } - fn combine(self, other: Self) -> Self { + fn combine( + self, + other: Self, + ) -> Self { let mut mz = self.mz; let mut rt = self.rt; let mut ims = self.ims; @@ -369,7 +384,10 @@ struct TimeTimsPeakConverter { } impl NDPointConverter for TimeTimsPeakConverter { - fn convert(&self, elem: &TimeTimsPeak) -> NDPoint<3> { + fn convert( + &self, + elem: &TimeTimsPeak, + ) -> NDPoint<3> { NDPoint { values: [ (elem.mz / self.mz_scaling) as f32, @@ -383,7 +401,10 @@ impl NDPointConverter for TimeTimsPeakConverter { struct BypassBaseTraceBackConverter {} impl NDPointConverter for BypassBaseTraceBackConverter { - fn convert(&self, _elem: &BaseTrace) -> NDPoint<3> { + fn convert( + &self, + _elem: &BaseTrace, + ) -> NDPoint<3> { panic!("This should never be called"); } } @@ -515,7 +536,10 @@ impl Default for PseudoSpectrumAggregator { } impl<'a> ClusterAggregator for PseudoSpectrumAggregator { - fn add(&mut self, peak: &BaseTrace) { + fn add( + &mut self, + peak: &BaseTrace, + ) { debug_assert!(peak.intensity < u64::MAX - self.intensity); self.rt.add(peak.rt as f64, peak.intensity); @@ -550,7 +574,10 @@ impl<'a> ClusterAggregator for PseudoSpectrumAggregat } } - fn combine(self, other: Self) -> Self { + fn combine( + self, + other: Self, + ) -> Self { let mut peaks = self.peaks.clone(); peaks.extend(other.peaks.clone()); let mut rt = self.rt; @@ -581,7 +608,10 @@ struct BaseTraceConverter { } impl NDPointConverter for BaseTraceConverter { - fn convert(&self, elem: &BaseTrace) -> NDPoint<3> { + fn convert( + &self, + elem: &BaseTrace, + ) -> NDPoint<3> { // let rt_start_use = (elem.rt - elem.rt_std).min(elem.rt - self.peak_width_prior as f32); // let rt_end_use = (elem.rt + elem.rt_std).max(elem.rt + self.peak_width_prior as f32); // let rt_start_end_scaling = self.rt_scaling * self.rt_start_end_ratio; @@ -627,7 +657,10 @@ struct PseudoScanBackConverter { } impl NDPointConverter for PseudoScanBackConverter { - fn convert(&self, elem: &PseudoSpectrum) -> NDPoint<3> { + fn convert( + &self, + elem: &PseudoSpectrum, + ) -> NDPoint<3> { let quad_mid = (elem.quad_low + elem.quad_high) / 2.; NDPoint { values: [ diff --git a/src/main.rs b/src/main.rs index 43081d6..8ae427a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -139,10 +139,10 @@ fn main() { None => Ok(()), }; match out { - Ok(_) => {} + Ok(_) => {}, Err(e) => { log::warn!("Error writing traces: {:?}", e); - } + }, } println!("traces: {:?}", traces.len()); @@ -194,10 +194,10 @@ fn main() { }; match out { - Ok(_) => {} + Ok(_) => {}, Err(e) => { log::warn!("Error writing pseudoscans: {:?}", e); - } + }, } println!("pseudoscans: {:?}", pseudoscans.len()); @@ -210,9 +210,9 @@ fn main() { 1, ); match score_out { - Ok(_) => {} + Ok(_) => {}, Err(e) => { log::error!("Error scoring pseudospectra: {:?}", e); - } + }, } } diff --git a/src/ms/frames.rs b/src/ms/frames.rs index 7f6a8e0..b3f93dc 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -58,8 +58,14 @@ pub struct FrameMsMsWindowInfo { } pub trait FramePointTolerance { - fn tof_index_range(&self, tof_index: u32) -> (u32, u32); - fn scan_range(&self, scan_index: usize) -> (usize, usize); + fn tof_index_range( + &self, + tof_index: u32, + ) -> (u32, u32); + fn scan_range( + &self, + scan_index: usize, + ) -> (usize, usize); } struct AbsoluteFramePointTolerance { @@ -68,7 +74,10 @@ struct AbsoluteFramePointTolerance { } impl FramePointTolerance for AbsoluteFramePointTolerance { - fn tof_index_range(&self, tof_index: u32) -> (u32, u32) { + fn tof_index_range( + &self, + tof_index: u32, + ) -> (u32, u32) { let tof_index_tolerance = self.tof_index_tolerance; ( tof_index.saturating_sub(tof_index_tolerance), @@ -76,7 +85,10 @@ impl FramePointTolerance for AbsoluteFramePointTolerance { ) } - fn scan_range(&self, scan_index: usize) -> (usize, usize) { + fn scan_range( + &self, + scan_index: usize, + ) -> (usize, usize) { let scan_tolerance = self.scan_tolerance; ( scan_index.saturating_sub(scan_tolerance), @@ -93,7 +105,10 @@ pub struct RangeSet { } impl RangeSet { - fn extend(&mut self, other: RangeSet) { + fn extend( + &mut self, + other: RangeSet, + ) { let new_offset = self.offset.min(other.offset); let vs_self_offset = self.offset - new_offset; let vs_other_offset = other.offset - new_offset; @@ -218,7 +233,10 @@ impl<'a> FrameSlice<'a> { /// this function will return the global scan number that tof index would belong /// to... in other words, "what is the scan number in the parent frame where peak /// number `x` in the frame slice would be found in the parent frame?" - pub fn global_scan_at_index(&self, local_index: usize) -> usize { + pub fn global_scan_at_index( + &self, + local_index: usize, + ) -> usize { let search_val = self.scan_offsets[0] + local_index; let loc = self .scan_offsets @@ -229,7 +247,7 @@ impl<'a> FrameSlice<'a> { x -= 1; } x - } + }, Err(x) => x - 1, }; self.scan_start + local_scan_index @@ -262,7 +280,10 @@ impl<'a> FrameSlice<'a> { scan_numbers } - pub fn tof_intensities_at_scan(&self, scan_number: usize) -> ((&[u32], &[u32]), usize) { + pub fn tof_intensities_at_scan( + &self, + scan_number: usize, + ) -> ((&[u32], &[u32]), usize) { let scan_index = scan_number - self.scan_start; let offset_offset = self.scan_offsets[0]; let scan_start = self.scan_offsets[scan_index] - offset_offset; @@ -293,7 +314,7 @@ impl<'a> FrameSlice<'a> { x -= 1; } x - } + }, Err(x) => x, }; @@ -308,7 +329,7 @@ impl<'a> FrameSlice<'a> { x += 1; } x - } + }, }; if tof_index_end > tof_index_start { @@ -388,18 +409,18 @@ impl DenseFrameWindow { panic!("No window info") // This branch points to an error in logic ... // The window info should always be present in this context. - } + }, Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { // This branch should be easy to implement for things like synchro pasef... // Some details to iron out though ... panic!("Not implemented") - } + }, Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { let window_group_id = x.window_group_id; let ww_quad_group_id = x.within_window_quad_group_id; let scan_start = frame_window.scan_start; (window_group_id, ww_quad_group_id, scan_start) - } + }, }; // NOTE: I am swapping here the 'scan start' to be the `ims_end` because @@ -419,7 +440,7 @@ impl DenseFrameWindow { "No scan range for window_group_id: {}, within_window_quad_group_id: {}", window_group_id, ww_quad_group_id ); - } + }, }; let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); @@ -545,7 +566,7 @@ impl DenseFrame { self.raw_peaks .sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); self.sorted = Some(SortingOrder::Mz); - } + }, } } @@ -556,7 +577,7 @@ impl DenseFrame { self.raw_peaks .sort_unstable_by(|a, b| a.mobility.partial_cmp(&b.mobility).unwrap()); self.sorted = Some(SortingOrder::Mobility); - } + }, } } } diff --git a/src/ms/sorting.rs b/src/ms/sorting.rs index b0c62d1..6d436a9 100644 --- a/src/ms/sorting.rs +++ b/src/ms/sorting.rs @@ -2,7 +2,10 @@ // MIT licensed trait SortExt { fn argsort(&self) -> Vec; - fn sort_by_indices(&mut self, indices: &mut Vec); + fn sort_by_indices( + &mut self, + indices: &mut Vec, + ); } impl SortExt for Vec { @@ -12,7 +15,10 @@ impl SortExt for Vec { indices } - fn sort_by_indices(&mut self, indices: &mut Vec) { + fn sort_by_indices( + &mut self, + indices: &mut Vec, + ) { for idx in 0..self.len() { if indices[idx] != usize::MAX { let mut current_idx = idx; diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index 0e2cf68..1cb4a5e 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -116,7 +116,10 @@ pub struct DIAFrameInfo { // of a splitter than a frame info reader. // Maybe a builder -> splitter pattern? impl DIAFrameInfo { - pub fn get_dia_frame_window_group(&self, frame_id: usize) -> Option<&DIAWindowGroup> { + pub fn get_dia_frame_window_group( + &self, + frame_id: usize, + ) -> Option<&DIAWindowGroup> { let group_id = self.frame_groups[frame_id]; match group_id { None => None, @@ -208,7 +211,10 @@ impl DIAFrameInfo { Ok(out_frames) } - pub fn split_frame_windows<'a>(&'a self, frames: &'a [Frame]) -> Vec> { + pub fn split_frame_windows<'a>( + &'a self, + frames: &'a [Frame], + ) -> Vec> { let mut out = Vec::new(); match self.grouping_level { @@ -216,12 +222,12 @@ impl DIAFrameInfo { for _ in 0..(self.groups.len() + 1) { out.push(Vec::new()); } - } + }, GroupingLevel::QuadWindowGroup => { for _ in 0..(self.row_to_group.len() + 1) { out.push(Vec::new()); } - } + }, } for frame in frames { @@ -233,7 +239,7 @@ impl DIAFrameInfo { GroupingLevel::WindowGroup => { panic!("WindowGroup grouping level not implemented for splitting frames") //out[group.id].push(frame_window); - } + }, GroupingLevel::QuadWindowGroup => { let frame_windows = self .split_frame(frame, group) @@ -242,16 +248,16 @@ impl DIAFrameInfo { match &frame_window.slice_window_info { None => { panic!("Frame window has no slice window info") - } + }, Some(MsMsFrameSliceWindowInfo::SingleWindow(scan_range)) => { out[scan_range.global_quad_row_id].push(frame_window); - } + }, Some(MsMsFrameSliceWindowInfo::WindowGroup(group)) => { out[*group].push(frame_window); - } + }, } } - } + }, } } @@ -285,7 +291,7 @@ impl DIAFrameInfo { scan_group_id, self.groups.len() ) - } + }, Some(group) => group, }; @@ -296,7 +302,7 @@ impl DIAFrameInfo { "Quad group not found for quad group id: {}, in scan_ranges {:?}", quad_group_id, group.scan_ranges ) - } + }, Some(quad_group) => quad_group, }; @@ -520,16 +526,16 @@ impl FrameInfoBuilder { &self.scan_converter, )); scangroup_id += 1; - } + }, } match grouping_level { GroupingLevel::WindowGroup => { row_to_group.push(usize_wg); - } + }, GroupingLevel::QuadWindowGroup => { row_to_group.push(scangroup_id); - } + }, } } Ok((group_map_vec, grouping_level, row_to_group)) diff --git a/src/scoring.rs b/src/scoring.rs index 3312ec2..b4e9bea 100644 --- a/src/scoring.rs +++ b/src/scoring.rs @@ -68,7 +68,10 @@ struct SerializableFeature<'a> { } impl<'a> SerializableFeature<'a> { - fn from_feature(feat: &'a sage_core::scoring::Feature, db: &IndexedDatabase) -> Self { + fn from_feature( + feat: &'a sage_core::scoring::Feature, + db: &IndexedDatabase, + ) -> Self { let peptide = db[feat.peptide_idx].to_string().clone(); SerializableFeature { peptide, @@ -78,7 +81,10 @@ impl<'a> SerializableFeature<'a> { } impl Serialize for SerializableFeature<'_> { - fn serialize(&self, serializer: S) -> Result + fn serialize( + &self, + serializer: S, + ) -> Result where S: Serializer, { @@ -124,7 +130,10 @@ impl Serialize for SerializableFeature<'_> { // -fn pseudospectrum_to_spec(pseudo: PseudoSpectrum, scan_id: String) -> RawSpectrum { +fn pseudospectrum_to_spec( + pseudo: PseudoSpectrum, + scan_id: String, +) -> RawSpectrum { let file_id = 0; let ms_level = 2; @@ -301,7 +310,7 @@ pub fn score_pseudospectra( // Serialize to a csv for debugging match out_path_features { - None => {} + None => {}, Some(out_path_features) => { warn!("Writing features to features.csv ... and sebastian should delete this b4 publishing..."); let mut wtr = csv::Writer::from_path(out_path_features)?; @@ -311,7 +320,7 @@ pub fn score_pseudospectra( } wtr.flush()?; drop(wtr); - } + }, } println!("Number of psms at 0.01 FDR: {}", num_q_001); diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index 7bf92cb..9917b36 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -42,7 +42,11 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { } } - pub fn insert_ndpoint(&mut self, point: NDPoint, value: &'a T) { + pub fn insert_ndpoint( + &mut self, + point: NDPoint, + value: &'a T, + ) { if cfg!(debug_assertions) && !self.boundary.contains(&point) { panic!( "Point {:?} is not contained in the boundary of this tree ({:?})", @@ -175,7 +179,10 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { Ok(()) } - pub fn query(&'a self, point: &NDPoint) -> Vec<&'a T> { + pub fn query( + &'a self, + point: &NDPoint, + ) -> Vec<&'a T> { let candidates: Vec<(&NDPoint, &T)> = self.query_range(&NDBoundary::new( point .values @@ -197,7 +204,10 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { out } - pub fn query_range(&'a self, boundary: &NDBoundary) -> Vec<(&NDPoint, &'a T)> { + pub fn query_range( + &'a self, + boundary: &NDBoundary, + ) -> Vec<(&NDPoint, &'a T)> { let mut result = Vec::new(); if !self.boundary.intersects(boundary) { return result; @@ -251,7 +261,10 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { } impl<'a, T, const D: usize> QueriableIndexedPoints<'a, D, T> for RadiusKDTree<'a, T, D> { - fn query_ndpoint(&'a self, point: &NDPoint) -> Vec<&'a T> { + fn query_ndpoint( + &'a self, + point: &NDPoint, + ) -> Vec<&'a T> { self.query(point) } diff --git a/src/space/quad.rs b/src/space/quad.rs index 7e5799d..e07c4b0 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -38,11 +38,19 @@ impl<'a, T> RadiusQuadTree<'a, T> { } } - pub fn insert_ndpoint(&mut self, point: NDPoint<2>, data: &'a T) { + pub fn insert_ndpoint( + &mut self, + point: NDPoint<2>, + data: &'a T, + ) { self.insert(point, data); } - pub fn insert(&mut self, point: NDPoint<2>, data: &'a T) { + pub fn insert( + &mut self, + point: NDPoint<2>, + data: &'a T, + ) { if cfg!(debug_assertions) && !self.boundary.contains(&point) { println!( "(Error??) Point outside of boundary {:?} {:?}", @@ -176,7 +184,10 @@ impl<'a, T> RadiusQuadTree<'a, T> { self.points.clear(); } - pub fn query(&'a self, point: &NDPoint<2>) -> Vec<(&'a NDPoint<2>, &'a T)> { + pub fn query( + &'a self, + point: &NDPoint<2>, + ) -> Vec<(&'a NDPoint<2>, &'a T)> { let mut result = Vec::new(); let range = NDBoundary::new( [point.values[0] - self.radius, point.values[1] - self.radius], @@ -208,7 +219,11 @@ impl<'a, T> RadiusQuadTree<'a, T> { } // This function is used a lot so any optimization here will have a big impact. - pub fn query_range(&'a self, range: &NDBoundary<2>, result: &mut Vec<(&'a NDPoint<2>, &'a T)>) { + pub fn query_range( + &'a self, + range: &NDBoundary<2>, + result: &mut Vec<(&'a NDPoint<2>, &'a T)>, + ) { if !self.boundary.intersects(range) || self.count == 0 { return; } @@ -241,7 +256,10 @@ impl<'a, T> RadiusQuadTree<'a, T> { // because it can do more than just count neighbors.... impl<'a, T> QueriableIndexedPoints<'a, 2, T> for RadiusQuadTree<'a, T> { - fn query_ndpoint(&'a self, point: &NDPoint<2>) -> Vec<&'a T> { + fn query_ndpoint( + &'a self, + point: &NDPoint<2>, + ) -> Vec<&'a T> { self.query(point) .into_iter() .map(|x| x.1) diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 70d5e0d..dff2853 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -7,7 +7,10 @@ pub struct NDBoundary { } impl NDBoundary { - pub fn new(starts: [f32; D], ends: [f32; D]) -> NDBoundary { + pub fn new( + starts: [f32; D], + ends: [f32; D], + ) -> NDBoundary { let mut widths = [0.0; D]; let mut centers = [0.0; D]; for i in 0..D { @@ -31,7 +34,10 @@ impl NDBoundary { } } - pub fn contains(&self, point: &NDPoint) -> bool { + pub fn contains( + &self, + point: &NDPoint, + ) -> bool { for i in 0..D { // if point.values[i] < self.starts[i] || point.values[i] >= self.ends[i] { if point.values[i] < self.starts[i] || point.values[i] > self.ends[i] { @@ -41,7 +47,10 @@ impl NDBoundary { true } - pub fn intersects(&self, other: &NDBoundary) -> bool { + pub fn intersects( + &self, + other: &NDBoundary, + ) -> bool { for i in 0..D { if self.starts[i] >= other.ends[i] || self.ends[i] <= other.starts[i] { return false; @@ -68,7 +77,10 @@ impl NDBoundary { NDBoundary::new(starts, ends) } - pub fn expand(&mut self, factors: &[f32; D]) { + pub fn expand( + &mut self, + factors: &[f32; D], + ) { for (i, ef) in factors.iter().enumerate() { let mut half_width = self.widths[i] / 2.0; let center = self.centers[i]; @@ -92,7 +104,10 @@ pub struct NDPoint { // Q: is there any instance where T is not usize? pub trait QueriableIndexedPoints<'a, const N: usize, T> { - fn query_ndpoint(&'a self, point: &NDPoint) -> Vec<&'a T>; + fn query_ndpoint( + &'a self, + point: &NDPoint, + ) -> Vec<&'a T>; fn query_ndrange( &'a self, boundary: &NDBoundary, @@ -101,10 +116,19 @@ pub trait QueriableIndexedPoints<'a, const N: usize, T> { } pub trait AsNDPoints { - fn get_ndpoint(&self, index: usize) -> NDPoint; + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint; fn num_ndpoints(&self) -> usize; - fn intensity_at(&self, index: usize) -> u64; - fn weight_at(&self, index: usize) -> u64 { + fn intensity_at( + &self, + index: usize, + ) -> u64; + fn weight_at( + &self, + index: usize, + ) -> u64 { self.intensity_at(index) } } @@ -125,8 +149,14 @@ pub trait TraceLike> { } pub trait NDPointConverter { - fn convert(&self, elem: &T) -> NDPoint; - fn convert_vec(&self, elems: &[T]) -> (Vec>, NDBoundary) { + fn convert( + &self, + elem: &T, + ) -> NDPoint; + fn convert_vec( + &self, + elems: &[T], + ) -> (Vec>, NDBoundary) { let points = elems .iter() .map(|elem| self.convert(elem)) diff --git a/src/utils.rs b/src/utils.rs index 33eb566..fb99aa6 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -18,7 +18,11 @@ pub enum LogLevel { } impl ContextTimer { - pub fn new(name: &str, report_start: bool, level: LogLevel) -> ContextTimer { + pub fn new( + name: &str, + report_start: bool, + level: LogLevel, + ) -> ContextTimer { let out = ContextTimer { start: Instant::now(), name: name.to_string(), @@ -44,7 +48,10 @@ impl ContextTimer { } } - pub fn stop(&mut self, report: bool) -> Duration { + pub fn stop( + &mut self, + report: bool, + ) -> Duration { let duration = self.start.elapsed(); self.cumtime += duration; if report { @@ -75,7 +82,10 @@ impl ContextTimer { } } - pub fn start_sub_timer(&self, name: &str) -> ContextTimer { + pub fn start_sub_timer( + &self, + name: &str, + ) -> ContextTimer { ContextTimer::new( &format!("{}::{}", self.name, name), self.report_start, @@ -223,7 +233,11 @@ where u64: AsPrimitive, f64: AsPrimitive, { - pub fn add(&mut self, x: T, w: W) { + pub fn add( + &mut self, + x: T, + w: W, + ) { // Check for overflows self.merge(&Self { n: 1, @@ -281,7 +295,10 @@ where self.max } - pub fn merge(&mut self, other: &Self) { + pub fn merge( + &mut self, + other: &Self, + ) { // There is for sure some optimization to be done here. // But right now the math is the hard part ... would definitely pay off let a = *self; @@ -405,7 +422,10 @@ mod test_rolling_sd { 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50, 5.56, 7.91, 6.89, ]; - fn assert_close(a: f64, b: f64) { + fn assert_close( + a: f64, + b: f64, + ) { assert!((a - b).abs() < 1e-3, "{} != {}", a, b); } diff --git a/tests/test_window_parsing.rs b/tests/test_window_parsing.rs index 536f43e..583601d 100644 --- a/tests/test_window_parsing.rs +++ b/tests/test_window_parsing.rs @@ -20,10 +20,10 @@ fn test_dia_pasef() { // Make sure the grouping is correctly assigned... for diaPASEF it should // be `QuadWindowGroup` match finfo.grouping_level { - GroupingLevel::QuadWindowGroup => {} + GroupingLevel::QuadWindowGroup => {}, GroupingLevel::WindowGroup => { assert!(false); - } + }, } // Make sure the grouping is correct. @@ -55,8 +55,8 @@ fn test_synchro_dia_pasef() { match finfo.grouping_level { GroupingLevel::QuadWindowGroup => { assert!(false); - } - GroupingLevel::WindowGroup => {} + }, + GroupingLevel::WindowGroup => {}, } // Make sure the grouping is correct. From b7d97b2d56c6ca31439f923f433cbc590c6a8565 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 7 Jul 2024 17:20:17 -0700 Subject: [PATCH 11/26] (wip) refactoring inner loop of dbscan (and formatting) --- src/aggregation/chromatograms.rs | 95 +++++-- src/aggregation/dbscan/runner.rs | 451 ++++++++++++++++++++++++------- 2 files changed, 427 insertions(+), 119 deletions(-) diff --git a/src/aggregation/chromatograms.rs b/src/aggregation/chromatograms.rs index 4e25519..17e976b 100644 --- a/src/aggregation/chromatograms.rs +++ b/src/aggregation/chromatograms.rs @@ -2,7 +2,7 @@ use log::warn; use num_traits::AsPrimitive; use std::collections::BTreeMap; -use std::ops::{AddAssign, Mul}; +use std::ops::{Add, AddAssign, Mul}; // Needs to be odd pub const NUM_LOCAL_CHROMATOGRAM_BINS: usize = 21; @@ -29,7 +29,10 @@ impl BTreeChromatogram { /// /// The values in bin = 0 will be in the range [bin_offset, bin_offset + binsize) /// - pub fn new(rt_binsize: f32, rt_bin_offset: f32) -> Self { + pub fn new( + rt_binsize: f32, + rt_bin_offset: f32, + ) -> Self { BTreeChromatogram { btree: BTreeMap::new(), rt_binsize, @@ -45,15 +48,25 @@ impl BTreeChromatogram { } } - fn rt_to_bin(&self, rt: f32) -> i32 { + fn rt_to_bin( + &self, + rt: f32, + ) -> i32 { ((rt - self.rt_bin_offset.unwrap()) / self.rt_binsize).floor() as i32 } - fn bin_to_rt(&self, bin: i32) -> f32 { + fn bin_to_rt( + &self, + bin: i32, + ) -> f32 { (bin as f32 * self.rt_binsize) + self.rt_bin_offset.unwrap() } - pub fn add(&mut self, rt: f32, intensity: u64) { + pub fn add( + &mut self, + rt: f32, + intensity: u64, + ) { let add_rt = rt + f32::EPSILON; if self.rt_bin_offset.is_none() { self.rt_bin_offset = Some(rt - (self.rt_binsize / 2.)); @@ -63,11 +76,17 @@ impl BTreeChromatogram { *entry += intensity; } - pub fn get_bin(&self, bin: &i32) -> Option<&u64> { + pub fn get_bin( + &self, + bin: &i32, + ) -> Option<&u64> { self.btree.get(bin) } - pub fn get_at_rt(&self, rt: f32) -> Option<&u64> { + pub fn get_at_rt( + &self, + rt: f32, + ) -> Option<&u64> { let bin = self.rt_to_bin(rt); self.btree.get(&bin) } @@ -84,12 +103,15 @@ impl BTreeChromatogram { Some(min) => { let max = *self.btree.keys().last().unwrap(); Some((*min, max)) - } + }, None => None, } } - pub fn adopt(&mut self, other: &Self) { + pub fn adopt( + &mut self, + other: &Self, + ) { if self.rt_bin_offset.is_none() { self.rt_bin_offset = other.rt_bin_offset; } @@ -101,7 +123,10 @@ impl BTreeChromatogram { } } - fn cosine_similarity(&self, other: &Self) -> Option { + fn cosine_similarity( + &self, + other: &Self, + ) -> Option { // Check that the bin size is almost the same let binsize_diff = (self.rt_binsize - other.rt_binsize).abs(); if binsize_diff > 0.01 { @@ -140,6 +165,10 @@ impl BTreeChromatogram { Some(cosine) } + fn total_intensity(&self) -> u64 { + self.btree.values().sum() + } + pub fn as_chromatogram_array( &self, center_rt: Option, @@ -148,11 +177,6 @@ impl BTreeChromatogram { let max_chr_arr_width = NUM_LOCAL_CHROMATOGRAM_BINS as f32 * self.rt_binsize; let curr_width = self.rt_range().unwrap().1 - self.rt_range().unwrap().0; - if curr_width > max_chr_arr_width * 2. { - warn!("Warning: Chromatogram range is larger than 2x the width of the chromatogram array {} vs {}", curr_width, max_chr_arr_width); - } - // Warn if the range is larger than the 2x width of the chromatogram - // array // The chromatogram uses the bin size of the chromatogram btree // but re-centers it to the mean RT of the trace @@ -171,18 +195,47 @@ impl BTreeChromatogram { } } - ChromatogramArray { + let out = ChromatogramArray { chromatogram: chromatogram_arr, rt_binsize: self.rt_binsize, rt_bin_offset: self.rt_bin_offset, + }; + + // Warn if the range is larger than the 2x width of the chromatogram + // array + if curr_width > max_chr_arr_width * 2. { + warn!( + "Warning: Chromatogram range is larger than 2x the width of the chromatogram array {} vs {} at RT: {}", + curr_width, + max_chr_arr_width, + out.rt_bin_offset.unwrap()); + let arr_intensities = out.total_intensity(); + let btree_intensities = self.total_intensity() as f32; + let ratio = arr_intensities / btree_intensities; + warn!( + "Array intensities: {}, Btree intensities: {}, Ratio: {}", + arr_intensities, btree_intensities, ratio + ); } + + out } } -impl + AddAssign + Default + AsPrimitive, const NBINS: usize> - ChromatogramArray +impl< + T: Mul + + Add + + AddAssign + + Default + + AsPrimitive + + for<'a> std::iter::Sum<&'a T>, + const NBINS: usize, + > ChromatogramArray { - pub fn cosine_similarity(&self, other: &Self) -> Option { + pub fn cosine_similarity( + &self, + other: &Self, + ) -> Option { // Check that the bin size is almost the same let binsize_diff = (self.rt_binsize - other.rt_binsize).abs(); if binsize_diff > 0.01 { @@ -218,6 +271,10 @@ impl + AddAssign + Default + AsPrimitive, const NBINS: u let cosine = dot.as_() / (mag_a * mag_b); Some(cosine) } + + pub fn total_intensity(&self) -> T { + self.chromatogram.iter().sum() + } } #[cfg(test)] diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index b8beb32..9c670f3 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -60,25 +60,41 @@ impl ClusterLabels { } } - fn set_cluster(&mut self, index: usize, cluster_id: u64) { + fn set_cluster( + &mut self, + index: usize, + cluster_id: u64, + ) { self.cluster_labels[index] = ClusterLabel::Cluster(cluster_id); } - fn set_new_cluster(&mut self, index: usize) { + fn set_new_cluster( + &mut self, + index: usize, + ) { self.num_clusters += 1; self.set_cluster(index, self.num_clusters); } - fn set_current_cluster(&mut self, index: usize) { + fn set_current_cluster( + &mut self, + index: usize, + ) { let cluster_id = self.num_clusters; self.set_cluster(index, cluster_id); } - fn set_noise(&mut self, index: usize) { + fn set_noise( + &mut self, + index: usize, + ) { self.cluster_labels[index] = ClusterLabel::Noise; } - fn get(&self, index: usize) -> ClusterLabel { + fn get( + &self, + index: usize, + ) -> ClusterLabel { self.cluster_labels[index] } } @@ -113,7 +129,10 @@ impl DBScanTimers { } } - fn report_if_gt_us(&self, min_time: u128) { + fn report_if_gt_us( + &self, + min_time: u128, + ) { if self.main.cumtime.as_micros() > min_time { self.main.report(); self.filter_fun_cache_timer.report(); @@ -148,7 +167,10 @@ struct DBSCANRunnerState { } impl DBSCANRunnerState { - fn new

(nlabels: usize, usize_filterfun: Option

) -> Self + fn new

( + nlabels: usize, + usize_filterfun: Option

, + ) -> Self where P: Fn(&usize, &usize) -> bool + Send + Sync, { @@ -170,7 +192,11 @@ impl DBSCANRunnerState { } } - fn create_progress_bar(&self, len: usize, visible: bool) -> indicatif::ProgressBar { + fn create_progress_bar( + &self, + len: usize, + visible: bool, + ) -> indicatif::ProgressBar { if visible { indicatif::ProgressBar::new(len as u64) } else { @@ -225,7 +251,7 @@ where }; let bind = Some(cl); bind - } + }, None => None, }; // |a: &usize, b: &usize| { @@ -246,13 +272,19 @@ where self.take_cluster_labels(state) } - fn report_timers(&self, mut state: DBSCANRunnerState) -> DBSCANRunnerState { + fn report_timers( + &self, + mut state: DBSCANRunnerState, + ) -> DBSCANRunnerState { state.timers.main.stop(false); state.timers.report_if_gt_us(1000000); state } - fn take_cluster_labels(&self, state: DBSCANRunnerState) -> ClusterLabels { + fn take_cluster_labels( + &self, + state: DBSCANRunnerState, + ) -> ClusterLabels { state.cluster_labels } @@ -281,6 +313,7 @@ where state } + /// This method gets applied to every point in decreasing intensity order. fn process_single_point( &self, point_index: usize, @@ -294,16 +327,20 @@ where return; } - let (neighbors, ref_point) = - self.find_neighbors(point_index, points, filter_fun_cache, timers, cc_metrics); + let neighbors = self.find_main_loop_neighbors( + point_index, + points, + filter_fun_cache, + timers, + cc_metrics, + ); if !self.is_core_point(&neighbors, points.prefiltered_peaks, timers) { cluster_labels.set_noise(point_index); return; } - self.expand_cluster( + self.main_loop_expand_cluster( point_index, - ref_point.unwrap(), neighbors, points, cluster_labels, @@ -312,14 +349,14 @@ where ); } - fn find_neighbors( + fn find_main_loop_neighbors( &self, point_index: usize, points: &DBSCANPoints<'a, N, E>, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, - ) -> (Vec, Option<&NDPoint>) { + ) -> Vec { timers.outer_loop_nn_timer.reset_start(); let query_elems = self .converter @@ -333,7 +370,7 @@ where timers.outer_loop_nn_timer.stop(false); if filter_fun_cache.is_none() { - return (candidate_neighbors, query_elems.1); + return candidate_neighbors; } let num_initial_candidates = candidate_neighbors.len(); @@ -349,7 +386,7 @@ where ); tmp.set(*i, point_index, res); res - } + }, } }); @@ -362,7 +399,7 @@ where .final_candidates_counts .add(candidates_after_filter as f32, 1); - (neighbors, query_elems.1) + neighbors } fn is_core_point( @@ -380,101 +417,315 @@ where return neighbor_intensity_total >= self.min_intensity; } - fn expand_cluster( + /// OLD IMPLEMENTATION delete before merging ... + // fn expand_cluster( + // &self, + // point_index: usize, + // query_point: &NDPoint, + // neighbors: Vec, + // points: &DBSCANPoints<'a, N, E>, + // cluster_labels: &mut ClusterLabels, + // filter_fun_cache: &mut Option, + // timers: &mut DBScanTimers, + // ) { + // cluster_labels.set_new_cluster(point_index); + + // let mut seed_set: Vec = Vec::new(); + // seed_set.extend(neighbors); + + // while let Some(neighbor) = seed_set.pop() { + // let neighbor_index = neighbor; + // if cluster_labels.get(neighbor_index) == ClusterLabel::Noise { + // cluster_labels.set_current_cluster(neighbor_index); + // } + + // if cluster_labels.get(neighbor_index) != ClusterLabel::Unassigned { + // continue; + // } + + // cluster_labels.set_current_cluster(neighbor_index); + + // timers.inner_loop_nn_timer.reset_start(); + // let inner_query_elems = self + // .converter + // .convert_to_bounds_query(&points.quad_points[neighbor]); + // let mut local_neighbors = points + // .indexed_points + // .query_ndrange(&inner_query_elems.0, inner_query_elems.1); + // timers.inner_loop_nn_timer.stop(false); + + // if filter_fun_cache.is_some() { + // local_neighbors.retain(|i| { + // let cache = filter_fun_cache.as_mut().unwrap(); + // let res = cache.get(**i, point_index); + // let out = match res { + // Some(res) => res, + // None => { + // let res = (self.filter_fun.unwrap())( + // &points.prefiltered_peaks[**i], + // &points.prefiltered_peaks[point_index], + // ); + // cache.set(**i, point_index, res); + // res + // } + // }; + // out + // }); + // } + + // timers.inner_intensity_calculation.reset_start(); + // let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); + // let neighbor_intensity_total = local_neighbors + // .iter() + // .map(|i| points.prefiltered_peaks[**i].intensity()) + // .sum::(); + // timers.inner_intensity_calculation.stop(false); + + // if local_neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity + // { + // local_neighbors + // .retain(|i| !matches!(cluster_labels.get(**i), ClusterLabel::Cluster(_))); + + // timers.local_neighbor_filter_timer.reset_start(); + // local_neighbors.retain(|i| { + // let going_downhill = + // points.prefiltered_peaks[**i].intensity() <= query_intensity; + + // let p: &NDPoint = &points.quad_points[**i]; + // let mut within_distance = true; + // for ((p, q), max_dist) in p + // .values + // .iter() + // .zip(query_point.values) + // .zip(self.max_extension_distances.iter()) + // { + // let dist = (p - q).abs(); + // within_distance = within_distance && dist <= *max_dist; + // if !within_distance { + // break; + // } + // } + + // going_downhill && within_distance + // }); + // timers.local_neighbor_filter_timer.stop(false); + + // seed_set.extend(local_neighbors); + // } + // } + // } + + fn main_loop_expand_cluster( &self, - point_index: usize, - query_point: &NDPoint, + apex_point_index: usize, neighbors: Vec, points: &DBSCANPoints<'a, N, E>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) { - cluster_labels.set_new_cluster(point_index); + cluster_labels.set_new_cluster(apex_point_index); + let mut seed_set: Vec = neighbors; + + while let Some(neighbor_index) = seed_set.pop() { + if !self.process_neighbor(neighbor_index, cluster_labels) { + continue; + } - let mut seed_set: Vec = Vec::new(); - seed_set.extend(neighbors); + let local_neighbors = self.find_local_neighbors(neighbor_index, points, timers); + let filtered_neighbors = self.filter_neighbors_inner_loop( + local_neighbors, + apex_point_index, + neighbor_index, + points, + cluster_labels, + filter_fun_cache, + timers, + ); + + seed_set.extend(filtered_neighbors); + } + } - while let Some(neighbor) = seed_set.pop() { - let neighbor_index = neighbor; - if cluster_labels.get(neighbor_index) == ClusterLabel::Noise { + fn process_neighbor( + &self, + neighbor_index: usize, + cluster_labels: &mut ClusterLabels, + ) -> bool { + match cluster_labels.get(neighbor_index) { + ClusterLabel::Noise => { cluster_labels.set_current_cluster(neighbor_index); - } + true + }, + ClusterLabel::Unassigned => { + cluster_labels.set_current_cluster(neighbor_index); + true + }, + ClusterLabel::Cluster(_) => false, + } + } - if cluster_labels.get(neighbor_index) != ClusterLabel::Unassigned { - continue; - } + fn find_local_neighbors( + &self, + neighbor_index: usize, + points: &DBSCANPoints<'a, N, E>, + timers: &mut DBScanTimers, + ) -> Vec { + timers.inner_loop_nn_timer.reset_start(); + let inner_query_elems = self + .converter + .convert_to_bounds_query(&points.quad_points[neighbor_index]); + let local_neighbors: Vec = points + .indexed_points + .query_ndrange(&inner_query_elems.0, inner_query_elems.1) + .iter_mut() + .map(|x| **x) + .collect::>(); + timers.inner_loop_nn_timer.stop(false); + local_neighbors + } - cluster_labels.set_current_cluster(neighbor_index); - - timers.inner_loop_nn_timer.reset_start(); - let inner_query_elems = self - .converter - .convert_to_bounds_query(&points.quad_points[neighbor]); - let mut local_neighbors = points - .indexed_points - .query_ndrange(&inner_query_elems.0, inner_query_elems.1); - timers.inner_loop_nn_timer.stop(false); - - if filter_fun_cache.is_some() { - local_neighbors.retain(|i| { - let cache = filter_fun_cache.as_mut().unwrap(); - let res = cache.get(**i, point_index); - match res { - Some(res) => res, - None => { - let res = (self.filter_fun.unwrap())( - &points.prefiltered_peaks[**i], - &points.prefiltered_peaks[point_index], - ); - cache.set(**i, point_index, res); - res - } - } - }); - } + fn filter_neighbors_inner_loop( + &self, + local_neighbors: Vec, + cluster_apex_point_index: usize, + current_center_point_index: usize, + points: &DBSCANPoints<'a, N, E>, + cluster_labels: &ClusterLabels, + filter_fun_cache: &mut Option, + timers: &mut DBScanTimers, + ) -> Vec { + let filtered = self.apply_filter_fun( + local_neighbors, + cluster_apex_point_index, + points, + filter_fun_cache, + ); - timers.inner_intensity_calculation.reset_start(); - let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); - let neighbor_intensity_total = local_neighbors - .iter() - .map(|i| points.prefiltered_peaks[**i].intensity()) - .sum::(); - timers.inner_intensity_calculation.stop(false); - - if local_neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity - { - local_neighbors - .retain(|i| !matches!(cluster_labels.get(**i), ClusterLabel::Cluster(_))); - - timers.local_neighbor_filter_timer.reset_start(); - local_neighbors.retain(|i| { - let going_downhill = - points.prefiltered_peaks[**i].intensity() <= query_intensity; - - let p: &NDPoint = &points.quad_points[**i]; - let mut within_distance = true; - for ((p, q), max_dist) in p - .values - .iter() - .zip(query_point.values) - .zip(self.max_extension_distances.iter()) - { - let dist = (p - q).abs(); - within_distance = within_distance && dist <= *max_dist; - if !within_distance { - break; - } - } - - going_downhill && within_distance - }); - timers.local_neighbor_filter_timer.stop(false); - - seed_set.extend(local_neighbors); - } + if !self.is_extension_core_point(&filtered, current_center_point_index, points, timers) { + return Vec::new(); + } + + let unassigned = self.filter_unassigned(filtered, cluster_labels); + let unassigned_in_global_distance = + self.filter_by_apex_distance(unassigned, cluster_apex_point_index, points, timers); + self.filter_by_local_intensity_and_distance( + unassigned_in_global_distance, + current_center_point_index, + points, + timers, + ) + } + + fn filter_by_apex_distance( + &self, + mut neighbors: Vec, + apex_point_index: usize, + points: &DBSCANPoints<'a, N, E>, + timers: &mut DBScanTimers, + ) -> Vec { + timers.local_neighbor_filter_timer.reset_start(); + let query_point = &points.quad_points[apex_point_index]; + neighbors.retain(|&i| self.is_within_max_distance(&points.quad_points[i], query_point)); + timers.local_neighbor_filter_timer.stop(false); + neighbors + } + + fn is_extension_core_point( + &self, + neighbors: &[usize], + current_center_point_index: usize, + points: &DBSCANPoints<'a, N, E>, + timers: &mut DBScanTimers, + ) -> bool { + timers.inner_intensity_calculation.reset_start(); + let mut neighbor_intensity_total: u64 = neighbors + .iter() + .map(|&i| points.prefiltered_peaks[i].intensity()) + .sum(); + + neighbor_intensity_total += + points.prefiltered_peaks[current_center_point_index].intensity(); + timers.inner_intensity_calculation.stop(false); + + neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity + } + + /// This is mean to apply additional filter logic that considers + /// elements that are not only represented by the 'space' of the points + /// or the intensity. + /// + /// Some examples might be if every point represents say ... a chromatogram + /// one could pass a function that checks if the chromatograms a high correlation. + /// Because two might share the same point in space, intensity is not really + /// relevant but co-elution might be critical. + fn apply_filter_fun( + &self, + local_neighbors: Vec, + point_index: usize, + points: &DBSCANPoints<'a, N, E>, + filter_fun_cache: &mut Option, + ) -> Vec { + if let Some(cache) = filter_fun_cache { + local_neighbors + .into_iter() + .filter(|&i| { + cache.get(i, point_index).unwrap_or_else(|| { + let res = (self.filter_fun.unwrap())( + &points.prefiltered_peaks[i], + &points.prefiltered_peaks[point_index], + ); + cache.set(i, point_index, res); + res + }) + }) + .collect() + } else { + local_neighbors } } + + fn filter_unassigned( + &self, + mut neighbors: Vec, + cluster_labels: &ClusterLabels, + ) -> Vec { + neighbors.retain(|&i| matches!(cluster_labels.get(i), ClusterLabel::Unassigned)); + neighbors + } + + fn filter_by_local_intensity_and_distance( + &self, + mut neighbors: Vec, + neighbor_index: usize, + points: &DBSCANPoints<'a, N, E>, + timers: &mut DBScanTimers, + ) -> Vec { + timers.local_neighbor_filter_timer.reset_start(); + let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); + let query_point = &points.quad_points[neighbor_index]; + + neighbors.retain(|&i| { + let going_downhill = points.prefiltered_peaks[i].intensity() <= query_intensity; + let within_distance = self.is_within_max_distance(&points.quad_points[i], query_point); + going_downhill && within_distance + }); + + timers.local_neighbor_filter_timer.stop(false); + neighbors + } + + fn is_within_max_distance( + &self, + p: &NDPoint, + query_point: &NDPoint, + ) -> bool { + p.values + .iter() + .zip(query_point.values) + .zip(self.max_extension_distances.iter()) + .all(|((p, q), max_dist)| (p - q).abs() <= *max_dist) + } } pub fn _dbscan< From 8be203caabeb500bf43b2dca1059b5f68db8f7e9 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 7 Jul 2024 20:01:21 -0700 Subject: [PATCH 12/26] (feature) Generic implem,entation of intense at and as ndpoints at index --- src/aggregation/aggregators.rs | 237 ++++++++++++++++----------- src/aggregation/dbscan/runner.rs | 267 ++++++++++++------------------- src/space/space_generics.rs | 45 ++++-- 3 files changed, 281 insertions(+), 268 deletions(-) diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index 29b4d55..fb44b40 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -92,104 +92,22 @@ pub fn aggregate_clusters< keep_unclustered: bool, ) -> Vec { let cluster_vecs: Vec = if cfg!(feature = "par_dataprep") { - let mut timer = - utils::ContextTimer::new("dbscan_generic::par_aggregation", true, log_level); - let out: Vec<(usize, T)> = cluster_labels - .iter() - .enumerate() - .filter_map(|(point_index, x)| match x { - ClusterLabel::Cluster(cluster_id) => { - let cluster_idx = *cluster_id as usize - 1; - let tmp: Option<(usize, T)> = Some((cluster_idx, elements[point_index])); - tmp - }, - _ => None, - }) - .collect(); - - let run_closure = - |chunk: Vec<(usize, T)>| _inner(&chunk, tot_clusters as usize, &def_aggregator); - let chunk_size = (out.len() / rayon::current_num_threads()) / 2; - let chunk_size = chunk_size.max(1); - let out2 = out - .into_par_iter() - .chunks(chunk_size) - .map(run_closure) - .reduce(Vec::new, |l, r| { - if l.is_empty() { - r - } else { - l.into_iter() - .zip(r) - .map(|(l, r)| match (l, r) { - (Some(l), Some(r)) => { - let o = l.combine(r); - Some(o) - }, - (Some(l), None) => Some(l), - (None, Some(r)) => Some(r), - (None, None) => None, - }) - .collect::>() - } - }); - - let mut cluster_vecs = out2.into_iter().flatten().collect::>(); - - let unclustered_elems: Vec = cluster_labels - .iter() - .enumerate() - .filter(|(_, x)| match x { - ClusterLabel::Unassigned => true, - ClusterLabel::Noise => keep_unclustered, - _ => false, - }) - .map(|(i, _elem)| i) - .collect(); - - // if unclustered_elems.len() > 0 { - // log::debug!("Total Orig elems: {}", cluster_labels.len()); - // log::debug!("Unclustered elems: {}", unclustered_elems.len()); - // log::debug!("Clustered elems: {}", cluster_vecs.len()); - // } - - let unclustered_elems = unclustered_elems - .iter() - .map(|i| { - let mut oe = def_aggregator(); - oe.add(&elements[*i]); - oe - }) - .collect::>(); - - cluster_vecs.extend(unclustered_elems); - - timer.stop(true); - cluster_vecs + parallel_aggregate_clusters( + tot_clusters, + cluster_labels, + elements, + def_aggregator, + log_level, + keep_unclustered, + ) } else { - let mut cluster_vecs: Vec = Vec::with_capacity(tot_clusters as usize); - let mut unclustered_points: Vec = Vec::new(); - for _ in 0..tot_clusters { - cluster_vecs.push(def_aggregator()); - } - for (point_index, cluster_label) in cluster_labels.iter().enumerate() { - match cluster_label { - ClusterLabel::Cluster(cluster_id) => { - let cluster_idx = *cluster_id as usize - 1; - cluster_vecs[cluster_idx].add(&(elements[point_index])); - }, - ClusterLabel::Noise => { - if keep_unclustered { - let mut oe = def_aggregator(); - oe.add(&elements[point_index]); - unclustered_points.push(oe); - } - }, - _ => {}, - } - } - cluster_vecs.extend(unclustered_points); - cluster_vecs + serial_aggregate_clusters( + tot_clusters, + cluster_labels, + elements, + def_aggregator, + keep_unclustered, + ) }; let mut timer = @@ -203,6 +121,131 @@ pub fn aggregate_clusters< out } +fn parallel_aggregate_clusters< + T: HasIntensity + Send + Clone + Copy, + G: Sync + Send + ClusterAggregator, + R: Send, + F: Fn() -> G + Send + Sync, +>( + tot_clusters: u64, + cluster_labels: Vec>, + elements: &[T], + def_aggregator: &F, + log_level: utils::LogLevel, + keep_unclustered: bool, +) -> Vec { + let mut timer = utils::ContextTimer::new("dbscan_generic::par_aggregation", true, log_level); + let out: Vec<(usize, T)> = cluster_labels + .iter() + .enumerate() + .filter_map(|(point_index, x)| match x { + ClusterLabel::Cluster(cluster_id) => { + let cluster_idx = *cluster_id as usize - 1; + let tmp: Option<(usize, T)> = Some((cluster_idx, elements[point_index])); + tmp + }, + _ => None, + }) + .collect(); + + let run_closure = + |chunk: Vec<(usize, T)>| _inner(&chunk, tot_clusters as usize, &def_aggregator); + let chunk_size = (out.len() / rayon::current_num_threads()) / 2; + let chunk_size = chunk_size.max(1); + let out2 = out + .into_par_iter() + .chunks(chunk_size) + .map(run_closure) + .reduce(Vec::new, |l, r| { + if l.is_empty() { + r + } else { + l.into_iter() + .zip(r) + .map(|(l, r)| match (l, r) { + (Some(l), Some(r)) => { + let o = l.combine(r); + Some(o) + }, + (Some(l), None) => Some(l), + (None, Some(r)) => Some(r), + (None, None) => None, + }) + .collect::>() + } + }); + + let mut cluster_vecs = out2.into_iter().flatten().collect::>(); + + let unclustered_elems: Vec = cluster_labels + .iter() + .enumerate() + .filter(|(_, x)| match x { + ClusterLabel::Unassigned => true, + ClusterLabel::Noise => keep_unclustered, + _ => false, + }) + .map(|(i, _elem)| i) + .collect(); + + // if unclustered_elems.len() > 0 { + // log::debug!("Total Orig elems: {}", cluster_labels.len()); + // log::debug!("Unclustered elems: {}", unclustered_elems.len()); + // log::debug!("Clustered elems: {}", cluster_vecs.len()); + // } + + let unclustered_elems = unclustered_elems + .iter() + .map(|i| { + let mut oe = def_aggregator(); + oe.add(&elements[*i]); + oe + }) + .collect::>(); + + cluster_vecs.extend(unclustered_elems); + + timer.stop(true); + cluster_vecs +} + +fn serial_aggregate_clusters< + T: HasIntensity + Send + Clone + Copy, + G: Sync + Send + ClusterAggregator, + R: Send, + F: Fn() -> G + Send + Sync, +>( + tot_clusters: u64, + cluster_labels: Vec>, + elements: &[T], + def_aggregator: &F, + keep_unclustered: bool, +) -> Vec { + let mut cluster_vecs: Vec = Vec::with_capacity(tot_clusters as usize); + let mut unclustered_points: Vec = Vec::new(); + for _ in 0..tot_clusters { + cluster_vecs.push(def_aggregator()); + } + for (point_index, cluster_label) in cluster_labels.iter().enumerate() { + match cluster_label { + ClusterLabel::Cluster(cluster_id) => { + let cluster_idx = *cluster_id as usize - 1; + cluster_vecs[cluster_idx].add(&(elements[point_index])); + }, + ClusterLabel::Noise => { + if keep_unclustered { + let mut oe = def_aggregator(); + oe.add(&elements[point_index]); + unclustered_points.push(oe); + } + }, + _ => {}, + } + } + cluster_vecs.extend(unclustered_points); + cluster_vecs +} + fn _inner, R>( chunk: &[(usize, T)], max_cluster_id: usize, diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 9c670f3..f01f326 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,9 +1,12 @@ use crate::space::space_generics::NDPointConverter; -use crate::space::space_generics::{HasIntensity, NDPoint, QueriableIndexedPoints}; +use crate::space::space_generics::{ + AsNDPointsAtIndex, HasIntensity, IntenseAtIndex, NDPoint, QueriableIndexedPoints, +}; use crate::utils; use indicatif::ProgressIterator; use rayon::prelude::*; +use serde_json::value::Index; use crate::aggregation::aggregators::ClusterLabel; use crate::aggregation::dbscan::utils::FilterFunCache; @@ -205,8 +208,6 @@ impl DBSCANRunnerState { } } -//trait FilterFunction: for<'a, 'b> Fn<(&'a E, &'b E)> + Sized{} - struct DBSCANRunner<'a, const N: usize, C, E> { min_n: usize, min_intensity: u64, @@ -216,34 +217,34 @@ struct DBSCANRunner<'a, const N: usize, C, E> { max_extension_distances: &'a [f32; N], } -struct DBSCANPoints<'a, const N: usize, E> { - prefiltered_peaks: &'a Vec, +struct DBSCANPoints<'a, const N: usize, E, PP, QP> +where + E: HasIntensity, + PP: IntenseAtIndex + std::ops::Index + std::marker::Send + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, +{ + prefiltered_peaks: &'a PP, // &'a Vec, intensity_sorted_indices: &'a Vec<(usize, u64)>, indexed_points: &'a (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), - quad_points: &'a [NDPoint], + quad_points: &'a QP, // [NDPoint], } -// C: NDPointConverter, -// C2: NDPointConverter, -// R: Send, -// G: Sync + Send + ClusterAggregator, -// T: HasIntensity + Send + Clone + Copy + Sync, -// F: Fn() -> G + Send + Sync, -// const N: usize, -// FF: Send + Sync + Fn(&T, &T) -> bool, - impl<'a, 'b: 'a, const N: usize, C, E> DBSCANRunner<'a, N, C, E> where C: NDPointConverter, E: Sync + HasIntensity, { - fn run( + fn run( &self, - prefiltered_peaks: &'b Vec, + prefiltered_peaks: &'b PP, // Vec, // trait impl Index intensity_sorted_indices: &'b Vec<(usize, u64)>, indexed_points: &'b (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), - quad_points: &'b [NDPoint], - ) -> ClusterLabels { + quad_points: &'b QP, //[NDPoint], // trait impl AsNDPointAtIndex> + ) -> ClusterLabels + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { let usize_filterfun = match self.filter_fun { Some(filterfun) => { let cl = |a: &usize, b: &usize| { @@ -254,12 +255,10 @@ where }, None => None, }; - // |a: &usize, b: &usize| { - // (self.filter_fun)(&prefiltered_peaks[*a], &prefiltered_peaks[*b]) - // }; + let mut state = DBSCANRunnerState::new(intensity_sorted_indices.len(), usize_filterfun); - let points: DBSCANPoints = DBSCANPoints { + let points: DBSCANPoints = DBSCANPoints { prefiltered_peaks, intensity_sorted_indices, indexed_points, @@ -288,11 +287,15 @@ where state.cluster_labels } - fn process_points( + fn process_points( &self, mut state: DBSCANRunnerState, - points: &DBSCANPoints<'a, N, E>, - ) -> DBSCANRunnerState { + points: &DBSCANPoints<'a, N, E, PP, QP>, + ) -> DBSCANRunnerState + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { let my_progbar = state.create_progress_bar(points.intensity_sorted_indices.len(), self.progress); @@ -314,15 +317,18 @@ where } /// This method gets applied to every point in decreasing intensity order. - fn process_single_point( + fn process_single_point( &self, point_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, - ) { + ) where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { if cluster_labels.get(point_index) != ClusterLabel::Unassigned { return; } @@ -349,18 +355,22 @@ where ); } - fn find_main_loop_neighbors( + fn find_main_loop_neighbors( &self, point_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, - ) -> Vec { + ) -> Vec + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { timers.outer_loop_nn_timer.reset_start(); let query_elems = self .converter - .convert_to_bounds_query(&points.quad_points[point_index]); + .convert_to_bounds_query(&points.quad_points.get_ndpoint(point_index)); let mut candidate_neighbors = points .indexed_points .query_ndrange(&query_elems.0, query_elems.1) @@ -402,12 +412,15 @@ where neighbors } - fn is_core_point( + fn is_core_point( &self, neighbors: &[usize], - prefiltered_peaks: &'a Vec, + prefiltered_peaks: &'a PP, timers: &mut DBScanTimers, - ) -> bool { + ) -> bool + where + PP: IntenseAtIndex + ?Sized, + { timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors .iter() @@ -417,113 +430,18 @@ where return neighbor_intensity_total >= self.min_intensity; } - /// OLD IMPLEMENTATION delete before merging ... - // fn expand_cluster( - // &self, - // point_index: usize, - // query_point: &NDPoint, - // neighbors: Vec, - // points: &DBSCANPoints<'a, N, E>, - // cluster_labels: &mut ClusterLabels, - // filter_fun_cache: &mut Option, - // timers: &mut DBScanTimers, - // ) { - // cluster_labels.set_new_cluster(point_index); - - // let mut seed_set: Vec = Vec::new(); - // seed_set.extend(neighbors); - - // while let Some(neighbor) = seed_set.pop() { - // let neighbor_index = neighbor; - // if cluster_labels.get(neighbor_index) == ClusterLabel::Noise { - // cluster_labels.set_current_cluster(neighbor_index); - // } - - // if cluster_labels.get(neighbor_index) != ClusterLabel::Unassigned { - // continue; - // } - - // cluster_labels.set_current_cluster(neighbor_index); - - // timers.inner_loop_nn_timer.reset_start(); - // let inner_query_elems = self - // .converter - // .convert_to_bounds_query(&points.quad_points[neighbor]); - // let mut local_neighbors = points - // .indexed_points - // .query_ndrange(&inner_query_elems.0, inner_query_elems.1); - // timers.inner_loop_nn_timer.stop(false); - - // if filter_fun_cache.is_some() { - // local_neighbors.retain(|i| { - // let cache = filter_fun_cache.as_mut().unwrap(); - // let res = cache.get(**i, point_index); - // let out = match res { - // Some(res) => res, - // None => { - // let res = (self.filter_fun.unwrap())( - // &points.prefiltered_peaks[**i], - // &points.prefiltered_peaks[point_index], - // ); - // cache.set(**i, point_index, res); - // res - // } - // }; - // out - // }); - // } - - // timers.inner_intensity_calculation.reset_start(); - // let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); - // let neighbor_intensity_total = local_neighbors - // .iter() - // .map(|i| points.prefiltered_peaks[**i].intensity()) - // .sum::(); - // timers.inner_intensity_calculation.stop(false); - - // if local_neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity - // { - // local_neighbors - // .retain(|i| !matches!(cluster_labels.get(**i), ClusterLabel::Cluster(_))); - - // timers.local_neighbor_filter_timer.reset_start(); - // local_neighbors.retain(|i| { - // let going_downhill = - // points.prefiltered_peaks[**i].intensity() <= query_intensity; - - // let p: &NDPoint = &points.quad_points[**i]; - // let mut within_distance = true; - // for ((p, q), max_dist) in p - // .values - // .iter() - // .zip(query_point.values) - // .zip(self.max_extension_distances.iter()) - // { - // let dist = (p - q).abs(); - // within_distance = within_distance && dist <= *max_dist; - // if !within_distance { - // break; - // } - // } - - // going_downhill && within_distance - // }); - // timers.local_neighbor_filter_timer.stop(false); - - // seed_set.extend(local_neighbors); - // } - // } - // } - - fn main_loop_expand_cluster( + fn main_loop_expand_cluster( &self, apex_point_index: usize, neighbors: Vec, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, - ) { + ) where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { cluster_labels.set_new_cluster(apex_point_index); let mut seed_set: Vec = neighbors; @@ -565,16 +483,20 @@ where } } - fn find_local_neighbors( + fn find_local_neighbors( &self, neighbor_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, timers: &mut DBScanTimers, - ) -> Vec { + ) -> Vec + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { timers.inner_loop_nn_timer.reset_start(); let inner_query_elems = self .converter - .convert_to_bounds_query(&points.quad_points[neighbor_index]); + .convert_to_bounds_query(&points.quad_points.get_ndpoint(neighbor_index)); let local_neighbors: Vec = points .indexed_points .query_ndrange(&inner_query_elems.0, inner_query_elems.1) @@ -585,16 +507,20 @@ where local_neighbors } - fn filter_neighbors_inner_loop( + fn filter_neighbors_inner_loop( &self, local_neighbors: Vec, cluster_apex_point_index: usize, current_center_point_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, cluster_labels: &ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, - ) -> Vec { + ) -> Vec + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { let filtered = self.apply_filter_fun( local_neighbors, cluster_apex_point_index, @@ -617,27 +543,37 @@ where ) } - fn filter_by_apex_distance( + fn filter_by_apex_distance( &self, mut neighbors: Vec, apex_point_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, timers: &mut DBScanTimers, - ) -> Vec { + ) -> Vec + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { timers.local_neighbor_filter_timer.reset_start(); - let query_point = &points.quad_points[apex_point_index]; - neighbors.retain(|&i| self.is_within_max_distance(&points.quad_points[i], query_point)); + let query_point = &points.quad_points.get_ndpoint(apex_point_index); + neighbors.retain(|&i| { + self.is_within_max_distance(&points.quad_points.get_ndpoint(i), query_point) + }); timers.local_neighbor_filter_timer.stop(false); neighbors } - fn is_extension_core_point( + fn is_extension_core_point( &self, neighbors: &[usize], current_center_point_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, timers: &mut DBScanTimers, - ) -> bool { + ) -> bool + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { timers.inner_intensity_calculation.reset_start(); let mut neighbor_intensity_total: u64 = neighbors .iter() @@ -659,13 +595,17 @@ where /// one could pass a function that checks if the chromatograms a high correlation. /// Because two might share the same point in space, intensity is not really /// relevant but co-elution might be critical. - fn apply_filter_fun( + fn apply_filter_fun( &self, local_neighbors: Vec, point_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, filter_fun_cache: &mut Option, - ) -> Vec { + ) -> Vec + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { if let Some(cache) = filter_fun_cache { local_neighbors .into_iter() @@ -694,20 +634,25 @@ where neighbors } - fn filter_by_local_intensity_and_distance( + fn filter_by_local_intensity_and_distance( &self, mut neighbors: Vec, neighbor_index: usize, - points: &DBSCANPoints<'a, N, E>, + points: &DBSCANPoints<'a, N, E, PP, QP>, timers: &mut DBScanTimers, - ) -> Vec { + ) -> Vec + where + PP: IntenseAtIndex + ?Sized, + QP: AsNDPointsAtIndex + ?Sized, + { timers.local_neighbor_filter_timer.reset_start(); let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); - let query_point = &points.quad_points[neighbor_index]; + let query_point = &points.quad_points.get_ndpoint(neighbor_index); neighbors.retain(|&i| { let going_downhill = points.prefiltered_peaks[i].intensity() <= query_intensity; - let within_distance = self.is_within_max_distance(&points.quad_points[i], query_point); + let within_distance = + self.is_within_max_distance(&points.quad_points.get_ndpoint(i), query_point); going_downhill && within_distance }); @@ -736,7 +681,7 @@ pub fn _dbscan< T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, >( indexed_points: &'a T, - prefiltered_peaks: &'a Vec, + prefiltered_peaks: &'a [E], quad_points: &'a [NDPoint], min_n: usize, min_intensity: u64, diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index dff2853..6d3abe0 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -1,3 +1,5 @@ +use std::ops::Index; + #[derive(Debug, Clone, Copy)] pub struct NDBoundary { pub starts: [f32; DIMENSIONALITY], @@ -115,31 +117,54 @@ pub trait QueriableIndexedPoints<'a, const N: usize, T> { ) -> Vec<&'a T>; } -pub trait AsNDPoints { +pub trait AsNDPointsAtIndex { fn get_ndpoint( &self, index: usize, - ) -> NDPoint; + ) -> &NDPoint; fn num_ndpoints(&self) -> usize; - fn intensity_at( - &self, - index: usize, - ) -> u64; - fn weight_at( +} + +impl AsNDPointsAtIndex for [NDPoint] { + fn get_ndpoint( &self, index: usize, - ) -> u64 { - self.intensity_at(index) + ) -> &NDPoint { + &self[index] + } + + fn num_ndpoints(&self) -> usize { + self.len() } } -pub trait HasIntensity: Sync { +pub trait HasIntensity: Sync + Send { fn intensity(&self) -> u64; fn weight(&self) -> u64 { self.intensity() } } +pub trait IntenseAtIndex: Index + Send + Sync +where + T: HasIntensity, +{ + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self[index].intensity() + } + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + self[index].weight() + } +} + +impl IntenseAtIndex for [T] where T: HasIntensity {} + pub trait TraceLike> { fn get_mz(&self) -> f64; fn get_intensity(&self) -> u64; From 1403d90a99b6581f5b4c88c7b046f54ce7c08bf7 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 7 Jul 2024 20:55:12 -0700 Subject: [PATCH 13/26] (chore) removed dead code and documented better util --- src/aggregation/ms_denoise.rs | 42 +++-------------------------------- src/utils.rs | 8 +++++++ 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index fc337e5..3cea28f 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -135,44 +135,6 @@ fn _denoise_denseframe( denoised_frame } -fn _denoise_dia_frame( - frame: Frame, - min_n: usize, - min_intensity: u64, - dia_frame_info: &DIAFrameInfo, - ims_converter: &timsrust::Scan2ImConverter, - mz_converter: &timsrust::Tof2MzConverter, - mz_scaling: f64, - max_mz_extension: f64, - ims_scaling: f32, - max_ims_extension: f32, -) -> Vec { - let window_group = dia_frame_info - .get_dia_frame_window_group(frame.index) - .unwrap(); - let frame_windows = dia_frame_info - .split_frame(&frame, window_group) - .expect("Only DIA frames should be passed to this function"); - - frame_windows - .into_iter() - .map(|frame_window| { - denoise_frame_slice( - &frame_window, - ims_converter, - mz_converter, - dia_frame_info, - min_n, - min_intensity, - mz_scaling, - max_mz_extension, - ims_scaling, - max_ims_extension, - ) - }) - .collect::>() -} - fn denoise_frame_slice( frame_window: &FrameSlice, ims_converter: &timsrust::Scan2ImConverter, @@ -323,7 +285,9 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> let frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); let mut out = Vec::with_capacity(frame_window_slices.len()); - for sv in frame_window_slices { + let num_windows = frame_window_slices.len(); + for (i, sv) in frame_window_slices.iter().enumerate() { + info!("Denoising window {}/{}", i + 1, num_windows); let progbar = indicatif::ProgressBar::new(sv.len() as u64); let denoised_elements: Vec = sv .into_par_iter() diff --git a/src/utils.rs b/src/utils.rs index fb99aa6..5f4097a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -94,6 +94,12 @@ impl ContextTimer { } } +/// Applies a function to all elements within a certain distance of each element. +/// +/// Provided a slice of elements (assumed to be sorted by the key function), +/// a key function. For every element in the slice, a function will be applied +/// with the indices of the first and last element within the distance of the +/// current element. pub fn within_distance_apply( elems: &[T], key: &dyn Fn(&T) -> R, @@ -105,6 +111,8 @@ where T: Copy, W: Default + Copy, { + // TODO: rename all internal variables ... they made sense before this + // was a generic function. let mut prefiltered_peaks_bool: Vec = vec![W::default(); elems.len()]; let mut i_left = 0; From 42854f05ee6bb008019286bedbaeb3225c8272c6 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sat, 13 Jul 2024 07:44:49 -0700 Subject: [PATCH 14/26] (wip) implemented tests and intensity+point generics for frame slice --- src/aggregation/dbscan/runner.rs | 73 ++--- src/ms/frames.rs | 464 ++++++++++++++++++++++++++++--- src/space/space_generics.rs | 30 +- 3 files changed, 485 insertions(+), 82 deletions(-) diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index f01f326..5f1ade1 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -2,12 +2,11 @@ use crate::space::space_generics::NDPointConverter; use crate::space::space_generics::{ AsNDPointsAtIndex, HasIntensity, IntenseAtIndex, NDPoint, QueriableIndexedPoints, }; +use std::marker::PhantomData; + use crate::utils; use indicatif::ProgressIterator; -use rayon::prelude::*; -use serde_json::value::Index; - use crate::aggregation::aggregators::ClusterLabel; use crate::aggregation::dbscan::utils::FilterFunCache; @@ -219,14 +218,15 @@ struct DBSCANRunner<'a, const N: usize, C, E> { struct DBSCANPoints<'a, const N: usize, E, PP, QP> where - E: HasIntensity, - PP: IntenseAtIndex + std::ops::Index + std::marker::Send + ?Sized, + PP: IntenseAtIndex + std::marker::Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + E: HasIntensity, { prefiltered_peaks: &'a PP, // &'a Vec, intensity_sorted_indices: &'a Vec<(usize, u64)>, indexed_points: &'a (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), quad_points: &'a QP, // [NDPoint], + _marker: PhantomData, } impl<'a, 'b: 'a, const N: usize, C, E> DBSCANRunner<'a, N, C, E> @@ -242,13 +242,16 @@ where quad_points: &'b QP, //[NDPoint], // trait impl AsNDPointAtIndex> ) -> ClusterLabels where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + Sync + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { let usize_filterfun = match self.filter_fun { Some(filterfun) => { let cl = |a: &usize, b: &usize| { - filterfun(&prefiltered_peaks[*a], &prefiltered_peaks[*b]) + filterfun( + &prefiltered_peaks.get_intense_at_index(*a), + &prefiltered_peaks.get_intense_at_index(*b), + ) }; let bind = Some(cl); bind @@ -263,6 +266,7 @@ where intensity_sorted_indices, indexed_points, quad_points, + _marker: PhantomData, }; // Q: if filter fun is required ... why is it an option? state = self.process_points(state, &points); @@ -293,7 +297,7 @@ where points: &DBSCANPoints<'a, N, E, PP, QP>, ) -> DBSCANRunnerState where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { let my_progbar = @@ -326,7 +330,7 @@ where timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { if cluster_labels.get(point_index) != ClusterLabel::Unassigned { @@ -364,13 +368,12 @@ where cc_metrics: &mut CandidateCountMetrics, ) -> Vec where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { timers.outer_loop_nn_timer.reset_start(); - let query_elems = self - .converter - .convert_to_bounds_query(&points.quad_points.get_ndpoint(point_index)); + let binding = points.quad_points.get_ndpoint(point_index); + let query_elems = self.converter.convert_to_bounds_query(&binding); let mut candidate_neighbors = points .indexed_points .query_ndrange(&query_elems.0, query_elems.1) @@ -391,8 +394,8 @@ where Some(res) => res, None => { let res = (self.filter_fun.unwrap())( - &points.prefiltered_peaks[*i], - &points.prefiltered_peaks[point_index], + &points.prefiltered_peaks.get_intense_at_index(*i), + &points.prefiltered_peaks.get_intense_at_index(point_index), ); tmp.set(*i, point_index, res); res @@ -419,12 +422,12 @@ where timers: &mut DBScanTimers, ) -> bool where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, { timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors .iter() - .map(|i| prefiltered_peaks[*i].intensity()) + .map(|i| prefiltered_peaks.intensity_at_index(*i)) .sum::(); timers.outer_intensity_calculation.stop(false); return neighbor_intensity_total >= self.min_intensity; @@ -439,7 +442,7 @@ where filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { cluster_labels.set_new_cluster(apex_point_index); @@ -490,13 +493,12 @@ where timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { timers.inner_loop_nn_timer.reset_start(); - let inner_query_elems = self - .converter - .convert_to_bounds_query(&points.quad_points.get_ndpoint(neighbor_index)); + let binding = points.quad_points.get_ndpoint(neighbor_index); + let inner_query_elems = self.converter.convert_to_bounds_query(&binding); let local_neighbors: Vec = points .indexed_points .query_ndrange(&inner_query_elems.0, inner_query_elems.1) @@ -518,7 +520,7 @@ where timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { let filtered = self.apply_filter_fun( @@ -551,7 +553,7 @@ where timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { timers.local_neighbor_filter_timer.reset_start(); @@ -571,17 +573,18 @@ where timers: &mut DBScanTimers, ) -> bool where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { timers.inner_intensity_calculation.reset_start(); let mut neighbor_intensity_total: u64 = neighbors .iter() - .map(|&i| points.prefiltered_peaks[i].intensity()) + .map(|&i| points.prefiltered_peaks.intensity_at_index(i)) .sum(); - neighbor_intensity_total += - points.prefiltered_peaks[current_center_point_index].intensity(); + neighbor_intensity_total += points + .prefiltered_peaks + .intensity_at_index(current_center_point_index); timers.inner_intensity_calculation.stop(false); neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity @@ -603,7 +606,7 @@ where filter_fun_cache: &mut Option, ) -> Vec where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { if let Some(cache) = filter_fun_cache { @@ -612,8 +615,8 @@ where .filter(|&i| { cache.get(i, point_index).unwrap_or_else(|| { let res = (self.filter_fun.unwrap())( - &points.prefiltered_peaks[i], - &points.prefiltered_peaks[point_index], + &points.prefiltered_peaks.get_intense_at_index(i), + &points.prefiltered_peaks.get_intense_at_index(point_index), ); cache.set(i, point_index, res); res @@ -642,15 +645,15 @@ where timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, { timers.local_neighbor_filter_timer.reset_start(); - let query_intensity = points.prefiltered_peaks[neighbor_index].intensity(); + let query_intensity = points.prefiltered_peaks.intensity_at_index(neighbor_index); let query_point = &points.quad_points.get_ndpoint(neighbor_index); neighbors.retain(|&i| { - let going_downhill = points.prefiltered_peaks[i].intensity() <= query_intensity; + let going_downhill = points.prefiltered_peaks.intensity_at_index(i) <= query_intensity; let within_distance = self.is_within_max_distance(&points.quad_points.get_ndpoint(i), query_point); going_downhill && within_distance @@ -677,7 +680,7 @@ pub fn _dbscan< 'a, const N: usize, C: NDPointConverter, - E: Sync + HasIntensity, + E: Sync + Copy + HasIntensity, T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, >( indexed_points: &'a T, diff --git a/src/ms/frames.rs b/src/ms/frames.rs index b3f93dc..0698081 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -1,3 +1,8 @@ +use std::fmt; +use std::ops::Index; +use std::slice::SliceIndex; + +use rand::seq::index; pub use timsrust::Frame; pub use timsrust::FrameType; pub use timsrust::{ @@ -5,7 +10,8 @@ pub use timsrust::{ }; use crate::ms::tdf::{DIAFrameInfo, ScanRange}; -use crate::space::space_generics::HasIntensity; +use crate::space::space_generics::NDPoint; +use crate::space::space_generics::{AsNDPointsAtIndex, HasIntensity, IntenseAtIndex}; use log::info; @@ -30,6 +36,25 @@ pub struct RawTimsPeak { pub scan_index: usize, } +#[derive(Debug, Clone, Copy)] +pub struct RawTimsPeakReference<'a> { + pub intensity: &'a u32, + pub tof_index: &'a u32, + pub scan_index: &'a usize, +} + +impl HasIntensity for RawTimsPeak { + fn intensity(&self) -> u64 { + self.intensity as u64 + } +} + +impl<'a> HasIntensity for RawTimsPeakReference<'a> { + fn intensity(&self) -> u64 { + *self.intensity as u64 + } +} + fn _check_peak_sanity(peak: &TimsPeak) { debug_assert!(peak.intensity > 0); debug_assert!(peak.mz > 0.); @@ -44,6 +69,58 @@ pub enum SortingOrder { Intensity, } +#[derive(Debug, Clone, Copy)] +pub enum ScanNumberType { + Global(usize), + Local(usize), +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct MaxBounds { + requested: usize, + limit: usize, + local_limit: usize, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ScanOutOfBoundsError { + Global(MaxBounds), + Local(MaxBounds), +} + +impl ScanOutOfBoundsError { + pub fn local_limit(&self) -> usize { + match self { + ScanOutOfBoundsError::Global(x) => x.local_limit, + ScanOutOfBoundsError::Local(x) => x.local_limit, + } + } +} + +impl fmt::Display for ScanOutOfBoundsError { + fn fmt( + &self, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + match self { + ScanOutOfBoundsError::Global(x) => { + write!( + f, + "Global scan number out of bounds. Requested: {}, Limit: {}", + x.requested, x.limit + ) + }, + ScanOutOfBoundsError::Local(x) => { + write!( + f, + "Local scan number out of bounds. Requested: {}, Limit: {}", + x.requested, x.limit + ) + }, + } + } +} + /// Information on the context of a window in a frame. /// /// This adds to a frame slice the context of the what isolation was used @@ -64,8 +141,8 @@ pub trait FramePointTolerance { ) -> (u32, u32); fn scan_range( &self, - scan_index: usize, - ) -> (usize, usize); + scan_index: ScanNumberType, + ) -> (ScanNumberType, ScanNumberType); } struct AbsoluteFramePointTolerance { @@ -87,13 +164,24 @@ impl FramePointTolerance for AbsoluteFramePointTolerance { fn scan_range( &self, - scan_index: usize, - ) -> (usize, usize) { - let scan_tolerance = self.scan_tolerance; - ( - scan_index.saturating_sub(scan_tolerance), - scan_index + scan_tolerance, - ) + scan_index: ScanNumberType, + ) -> (ScanNumberType, ScanNumberType) { + match scan_index { + ScanNumberType::Global(x) => { + let scan_tolerance = self.scan_tolerance; + ( + ScanNumberType::Global(x.saturating_sub(scan_tolerance)), + ScanNumberType::Global(x + scan_tolerance), + ) + }, + ScanNumberType::Local(x) => { + let scan_tolerance = self.scan_tolerance; + ( + ScanNumberType::Local(x.saturating_sub(scan_tolerance)), + ScanNumberType::Local(x + scan_tolerance), + ) + }, + } } } @@ -198,7 +286,6 @@ impl<'a> FrameSlice<'a> { slice_window_info: Option, ) -> FrameSlice<'a> { let scan_offsets = &frame.scan_offsets[scan_start..=scan_end]; - let scan_start = scan_offsets[0]; let indprt_start = scan_offsets[0]; let indptr_end = *scan_offsets.last().expect("Scan range is empty"); @@ -237,16 +324,18 @@ impl<'a> FrameSlice<'a> { &self, local_index: usize, ) -> usize { + debug_assert!(local_index < self.tof_indices.len()); let search_val = self.scan_offsets[0] + local_index; let loc = self .scan_offsets .binary_search_by(|x| x.partial_cmp(&search_val).unwrap()); + let local_scan_index = match loc { Ok(mut x) => { - while x > 0 && self.scan_offsets[x - 1] >= search_val { - x -= 1; + while self.scan_offsets[x] == search_val { + x += 1; } - x + x - 1 }, Err(x) => x - 1, }; @@ -257,8 +346,8 @@ impl<'a> FrameSlice<'a> { let mut scan_numbers = Vec::with_capacity(self.tof_indices.len()); let curr_scan = self.scan_start; - for (scan_index, index_offset) in self.scan_offsets[1..].iter().enumerate() { - let num_tofs = index_offset - self.scan_offsets[scan_index]; + for (scan_index, index_offsets) in self.scan_offsets.windows(2).enumerate() { + let num_tofs = index_offsets[1] - index_offsets[0]; scan_numbers.extend(vec![curr_scan + scan_index; num_tofs]); } @@ -270,21 +359,70 @@ impl<'a> FrameSlice<'a> { last_scan = *scan; } - debug_assert!(scan_numbers[0] == self.scan_start); + // debug_assert_eq!(scan_numbers[0], self.scan_start); + debug_assert!(scan_numbers[0] >= self.scan_start); debug_assert!(scan_numbers.len() == self.tof_indices.len()); - debug_assert_eq!( - scan_numbers.last().unwrap(), - &(self.scan_offsets.len() - 1 + self.scan_start) + debug_assert!( + scan_numbers.last().unwrap() <= &(self.scan_offsets.len() - 1 + self.scan_start) ); } scan_numbers } + /// Get the tof indices and intensities at a scan number. + /// + /// Returns a tuple. + /// The first element is another tuple of the tof indices and intensities at the scan number. + /// The second element is the offset of the first local tof index in the scan. + /// pub fn tof_intensities_at_scan( &self, - scan_number: usize, + scan_number: ScanNumberType, + ) -> Result<((&[u32], &[u32]), usize), ScanOutOfBoundsError> { + let local_scan_number = self.scan_number_to_local(scan_number)?; + Ok(self.tof_intensities_at_local_scan(local_scan_number)) + } + + pub fn scan_number_to_local( + &self, + scan_number: ScanNumberType, + ) -> Result { + match scan_number { + ScanNumberType::Global(x) => { + if x < self.scan_start { + Err(ScanOutOfBoundsError::Global(MaxBounds { + requested: x, + limit: self.scan_start, + local_limit: 0, + })) + } else if x >= self.scan_start + self.scan_offsets.len() { + Err(ScanOutOfBoundsError::Global(MaxBounds { + requested: x, + limit: self.scan_start + self.scan_offsets.len(), + local_limit: self.scan_offsets.len() - 1, + })) + } else { + Ok(x - self.scan_start) + } + }, + ScanNumberType::Local(x) => { + if x >= self.scan_offsets.len() { + Err(ScanOutOfBoundsError::Local(MaxBounds { + requested: x, + limit: self.scan_offsets.len(), + local_limit: self.scan_offsets.len() - 1, + })) + } else { + Ok(x) + } + }, + } + } + + fn tof_intensities_at_local_scan( + &self, + scan_index: usize, ) -> ((&[u32], &[u32]), usize) { - let scan_index = scan_number - self.scan_start; let offset_offset = self.scan_offsets[0]; let scan_start = self.scan_offsets[scan_index] - offset_offset; let scan_end = self.scan_offsets[scan_index + 1] - offset_offset; @@ -293,17 +431,18 @@ impl<'a> FrameSlice<'a> { ((tof_indices, intensities), scan_start) } - pub fn matching_range_at_scan( + pub fn tof_range_in_tolerance_at_scan( &self, tof_index: i32, - scan_number: usize, + scan_number: ScanNumberType, tolerance: &T, - ) -> Option<(Range, usize)> + ) -> Result, ScanOutOfBoundsError> where T: FramePointTolerance, { // TODO implement later a two pointer approach for sorted slices of tof indices. - let ((tof_indices, _), start_indptr) = self.tof_intensities_at_scan(scan_number); + let ((tof_indices, _), local_tof_index_start) = + self.tof_intensities_at_scan(scan_number)?; let tof_len = tof_indices.len(); let (start, end) = tolerance.tof_index_range(tof_index as u32); let tof_index_start = tof_indices.binary_search_by(|x| x.partial_cmp(&start).unwrap()); @@ -319,13 +458,14 @@ impl<'a> FrameSlice<'a> { }; if tof_index_start >= tof_len { - return None; + return Ok(None); }; let tof_index_end = match tof_index_end { - Ok(x) => x, + Ok(x) => x, // On this branch we dont add more bc tof indices are unique. Err(mut x) => { while x < tof_len && tof_indices[x] < end { + println!("tof_indices[x]: {}, x: {}", tof_indices[x], x); x += 1; } x @@ -333,16 +473,19 @@ impl<'a> FrameSlice<'a> { }; if tof_index_end > tof_index_start { - Some(((tof_index_start, tof_index_end), start_indptr)) + Ok(Some(( + tof_index_start + local_tof_index_start, + tof_index_end + local_tof_index_start, + ))) } else { - None + Ok(None) } } pub fn matching_rangeset( &self, tof_index: i32, - scan_number: usize, + scan_number: ScanNumberType, tolerance: &T, ) -> Option where @@ -354,14 +497,27 @@ impl<'a> FrameSlice<'a> { }; let scan_range = tolerance.scan_range(scan_number); - for scan_number in scan_range.0..scan_range.1 { - if let Some(range_offset) = - self.matching_range_at_scan(tof_index, scan_number, tolerance) - { - ranges.ranges.push(( - range_offset.0 .0 - range_offset.1, - range_offset.0 .1 - range_offset.1, - )); + let local_start = match self.scan_number_to_local(scan_range.0) { + Ok(x) => x, + Err(x) => x.local_limit(), + }; + let local_end = match self.scan_number_to_local(scan_range.1) { + Ok(x) => x, + Err(x) => x.local_limit(), + }; + + for scan_number in local_start..local_end { + let tmp = self.tof_range_in_tolerance_at_scan( + tof_index, + ScanNumberType::Local(scan_number), + tolerance, + ); + + match tmp { + Ok(Some(range_offset)) => { + ranges.ranges.push(range_offset); + }, + _ => (), } } @@ -377,6 +533,236 @@ impl<'a> FrameSlice<'a> { } } +// Tests for the FrameSlice +#[cfg(test)] +mod tests { + use super::*; + + fn sample_frame() -> Frame { + Frame { + index: 0, + scan_offsets: vec![0, 0, 0, 0, 0, 3, 5, 6], + tof_indices: vec![100, 101, 102, 10, 20, 30], + intensities: vec![123, 111, 12, 3, 4, 1], + rt: 65.34, + frame_type: FrameType::MS1, + } + } + + #[test] + fn test_frame_slice() { + let frame = sample_frame(); + let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + + assert_eq!(frame_slice.scan_offsets, &[0, 0, 3]); + assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); + assert_eq!(frame_slice.intensities, &[123, 111, 12]); + assert_eq!(frame_slice.parent_frame_index, 0); + assert_eq!(frame_slice.rt, 65.34); + assert_eq!(frame_slice.frame_type, FrameType::MS1); + assert_eq!(frame_slice.scan_start, 0); + } + + #[test] + fn test_global_scan_at_index() { + let frame = sample_frame(); + let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + + assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); + assert_eq!(frame_slice.global_scan_at_index(0), 4); + assert_eq!(frame_slice.global_scan_at_index(1), 4); + assert_eq!(frame_slice.global_scan_at_index(2), 4); + } + + #[test] + #[should_panic] + fn test_global_scan_at_index_oob_fails() { + // these should fail ... test that it fails. + let frame = sample_frame(); + let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); + frame_slice.global_scan_at_index(3); + } + + #[test] + fn test_explode_scan_numbers() { + let frame = sample_frame(); + let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); + assert_eq!(frame_slice.scan_offsets, &[0, 0, 3]); + assert_eq!(frame_slice.explode_scan_numbers(), vec![4, 4, 4]); + } + + #[test] + fn test_tof_intensities_at_scan() { + let frame = sample_frame(); + let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + + assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); + assert_eq!(frame_slice.scan_offsets, &[0, 0, 3]); + assert_eq!(frame_slice.intensities, &[123, 111, 12]); + + let arg_expects = vec![ + ( + ScanNumberType::Global(4), + Ok(((vec![100, 101, 102], vec![123, 111, 12]), 0)), + ), + ( + ScanNumberType::Local(1), + Ok(((vec![100, 101, 102], vec![123, 111, 12]), 0)), + ), + ( + ScanNumberType::Global(2), + Err(ScanOutOfBoundsError::Global(MaxBounds { + requested: 2, + limit: 3, + local_limit: 0, + })), + ), + (ScanNumberType::Global(3), Ok(((Vec::new(), Vec::new()), 0))), + (ScanNumberType::Local(0), Ok(((Vec::new(), Vec::new()), 0))), + ]; + + for (arg, expect) in arg_expects { + println!("arg: {:?}", arg); + let val = frame_slice.tof_intensities_at_scan(arg); + match (val, expect) { + (Ok(x), Ok(y)) => { + assert_eq!(x.0 .0, y.0 .0); + assert_eq!(x.0 .1, y.0 .1); + assert_eq!(x.1, y.1); + }, + (Err(x), Err(y)) => { + assert_eq!(x, y); + }, + (Ok(x), Err(y)) => panic!("Mismatch {:?} vs {:?}", x, y), + (Err(x), Ok(y)) => panic!("Mismatch {:?} vs {:?}", x, y), + } + } + } + + #[test] + fn test_tof_range_in_tolerance_at_scan() { + let frame = sample_frame(); + let frame_slice = FrameSlice::slice_frame(&frame, 3, 7, None); + + assert_eq!(frame_slice.tof_indices, &[100, 101, 102, 10, 20, 30]); + assert_eq!(frame_slice.scan_offsets, &[0, 0, 3, 5, 6]); + assert_eq!(frame_slice.intensities, &[123, 111, 12, 3, 4, 1]); + + let tolerance = AbsoluteFramePointTolerance { + tof_index_tolerance: 1, + scan_tolerance: 1, + }; + + let param_expect_vec = vec![ + (10, ScanNumberType::Global(5), Ok(Some((3, 4)))), + (1, ScanNumberType::Global(5), Ok(None)), + (10, ScanNumberType::Global(4), Ok(None)), + (100, ScanNumberType::Global(4), Ok(Some((0, 1)))), + (101, ScanNumberType::Global(4), Ok(Some((0, 2)))), + (102, ScanNumberType::Global(4), Ok(Some((1, 3)))), + (100, ScanNumberType::Global(3), Ok(None)), + (100, ScanNumberType::Global(5), Ok(None)), + ( + 100, + ScanNumberType::Global(2), + Err(ScanOutOfBoundsError::Global(MaxBounds { + requested: 2, + limit: 3, + local_limit: 0, + })), + ), + ( + 100, + ScanNumberType::Global(1), + Err(ScanOutOfBoundsError::Global(MaxBounds { + requested: 1, + limit: 3, + local_limit: 0, + })), + ), + ( + 100, + ScanNumberType::Global(0), + Err(ScanOutOfBoundsError::Global(MaxBounds { + requested: 0, + limit: 3, + local_limit: 0, + })), + ), + ]; + for (tof_index, scan_number, expect) in param_expect_vec { + println!("tof_index: {}, scan_number: {:?}", tof_index, scan_number); + let val = + frame_slice.tof_range_in_tolerance_at_scan(tof_index, scan_number, &tolerance); + + match (val, expect) { + (Ok(Some(x)), Ok(Some(y))) => { + assert_eq!(x, y); + }, + (Ok(None), Ok(None)) => (), + (Err(x), Err(y)) => { + assert_eq!(x, y); + }, + (Ok(x), Ok(None)) => panic!("Mismatch {:?} vs {:?}", x, expect), + (Ok(None), Ok(x)) => panic!("Mismatch {:?} vs {:?}", x, expect), + (Err(x), Ok(y)) => panic!("Mismatch {:?} vs {:?}", x, y), + (Ok(x), Err(y)) => panic!("Mismatch {:?} vs {:?}", x, y), + } + } + } + + fn sample_ms2_frame() -> Frame { + Frame { + index: 0, + scan_offsets: vec![0, 0, 3, 5, 6], + tof_indices: vec![100, 101, 102, 10, 20, 30], + intensities: vec![123, 111, 12, 3, 4, 1], + rt: 65.34, + frame_type: FrameType::MS2(timsrust::AcquisitionType::DIAPASEF), + } + } +} + +impl<'a> IntenseAtIndex for FrameSlice<'a> { + fn get_intense_at_index( + &self, + index: usize, + ) -> RawTimsPeak { + let intensity = self.intensities[index]; + let tof_index = self.tof_indices[index]; + let scan_index = self.global_scan_at_index(index); + + let out = RawTimsPeak { + intensity, + tof_index, + scan_index, + }; + + out + } +} + +impl<'a> AsNDPointsAtIndex<3> for FrameSlice<'a> { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<3> { + let intensity = self.intensities[index]; + let tof_index = self.tof_indices[index]; + let scan_index = self.global_scan_at_index(index); + + NDPoint { + values: [tof_index as f32, scan_index as f32, intensity as f32], + } + } + + fn num_ndpoints(&self) -> usize { + self.intensities.len() + } +} + #[derive(Debug, Clone)] pub struct DenseFrame { pub raw_peaks: Vec, diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 6d3abe0..16becdb 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -99,7 +99,7 @@ impl NDBoundary { // #[derive(Debug, Clone, Copy)] // Oddly enough ... adding copy makes it slower ... -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub struct NDPoint { pub values: [f32; DIMENSIONALITY], } @@ -121,7 +121,7 @@ pub trait AsNDPointsAtIndex { fn get_ndpoint( &self, index: usize, - ) -> &NDPoint; + ) -> NDPoint; fn num_ndpoints(&self) -> usize; } @@ -129,8 +129,8 @@ impl AsNDPointsAtIndex for [NDPoint] { fn get_ndpoint( &self, index: usize, - ) -> &NDPoint { - &self[index] + ) -> NDPoint { + self[index] } fn num_ndpoints(&self) -> usize { @@ -145,7 +145,7 @@ pub trait HasIntensity: Sync + Send { } } -pub trait IntenseAtIndex: Index + Send + Sync +pub trait IntenseAtIndex where T: HasIntensity, { @@ -153,17 +153,31 @@ where &self, index: usize, ) -> u64 { - self[index].intensity() + self.get_intense_at_index(index).intensity() } fn weight_at_index( &self, index: usize, ) -> u64 { - self[index].weight() + self.get_intense_at_index(index).weight() } + fn get_intense_at_index( + &self, + index: usize, + ) -> T; } -impl IntenseAtIndex for [T] where T: HasIntensity {} +impl IntenseAtIndex for [T] +where + T: HasIntensity + Copy, +{ + fn get_intense_at_index( + &self, + index: usize, + ) -> T { + self[index] + } +} pub trait TraceLike> { fn get_mz(&self) -> f64; From 442710080dfded1cb47a067c8406e3a31abcfe09 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 15 Jul 2024 00:17:11 -0700 Subject: [PATCH 15/26] (wip) migration from slices of elements to indexable traits --- src/aggregation/aggregators.rs | 6 +- src/aggregation/dbscan/dbscan.rs | 2 +- src/aggregation/dbscan/runner.rs | 155 +++++++++++++++++++------------ src/ms/frames.rs | 41 +++++--- src/space/space_generics.rs | 92 ++++++++++-------- 5 files changed, 179 insertions(+), 117 deletions(-) diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index fb44b40..f5c2413 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -79,7 +79,7 @@ impl ClusterAggregator for TimsPeakAggregator { } pub fn aggregate_clusters< - T: HasIntensity + Send + Clone + Copy, + T: Send + Clone + Copy, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, @@ -122,7 +122,7 @@ pub fn aggregate_clusters< } fn parallel_aggregate_clusters< - T: HasIntensity + Send + Clone + Copy, + T: Send + Clone + Copy, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, @@ -210,7 +210,7 @@ fn parallel_aggregate_clusters< } fn serial_aggregate_clusters< - T: HasIntensity + Send + Clone + Copy, + T: Send + Clone + Copy, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index abbedf1..3bc2cd1 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -15,7 +15,7 @@ use crate::aggregation::dbscan::runner::_dbscan; fn reassign_centroid< 'a, const N: usize, - T: HasIntensity + Send + Clone + Copy, + T: Send + Clone + Copy, C: NDPointConverter, I: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, G: Sync + Send + ClusterAggregator, diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 5f1ade1..79d9522 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,6 +1,7 @@ use crate::space::space_generics::NDPointConverter; use crate::space::space_generics::{ - AsNDPointsAtIndex, HasIntensity, IntenseAtIndex, NDPoint, QueriableIndexedPoints, + convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, + NDPoint, QueriableIndexedPoints, }; use std::marker::PhantomData; @@ -207,51 +208,78 @@ impl DBSCANRunnerState { } } -struct DBSCANRunner<'a, const N: usize, C, E> { +struct DBSCANRunner<'a, const N: usize, D> { min_n: usize, min_intensity: u64, - filter_fun: Option<&'a (dyn Fn(&E, &E) -> bool + Send + Sync)>, - converter: C, + filter_fun: Option<&'a (dyn Fn(&D) -> bool + Send + Sync)>, progress: bool, max_extension_distances: &'a [f32; N], } -struct DBSCANPoints<'a, const N: usize, E, PP, QP> +struct DBSCANPoints<'a, const N: usize, PP, QP, D, E> where - PP: IntenseAtIndex + std::marker::Send + ?Sized, + PP: IntenseAtIndex + std::marker::Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, - E: HasIntensity, + D: DistantAtIndex, { prefiltered_peaks: &'a PP, // &'a Vec, intensity_sorted_indices: &'a Vec<(usize, u64)>, indexed_points: &'a (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), quad_points: &'a QP, // [NDPoint], - _marker: PhantomData, + dist: D, + _phantom_metric: PhantomData, } -impl<'a, 'b: 'a, const N: usize, C, E> DBSCANRunner<'a, N, C, E> +impl<'a, const N: usize, PP, QQ, D, E> DBSCANPoints<'a, N, PP, QQ, D, E> where - C: NDPointConverter, - E: Sync + HasIntensity, + PP: IntenseAtIndex + std::marker::Send + ?Sized, + QQ: AsNDPointsAtIndex + ?Sized, + D: DistantAtIndex, +{ + fn get_intensity_at_index( + &self, + index: usize, + ) -> u64 { + self.prefiltered_peaks.intensity_at_index(index) + } + + fn get_ndpoint_at_index( + &self, + index: usize, + ) -> NDPoint { + self.quad_points.get_ndpoint(index) + } + + fn get_distance_at_indices( + &self, + a: usize, + b: usize, + ) -> E { + self.dist.distance_at_indices(a, b) + } +} + +impl<'a, 'b: 'a, const N: usize, D> DBSCANRunner<'a, N, D> +where + D: Sync, { - fn run( + fn run( &self, prefiltered_peaks: &'b PP, // Vec, // trait impl Index intensity_sorted_indices: &'b Vec<(usize, u64)>, indexed_points: &'b (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), quad_points: &'b QP, //[NDPoint], // trait impl AsNDPointAtIndex> + distance_calculator: DAI, ) -> ClusterLabels where - PP: IntenseAtIndex + Send + Sync + ?Sized, + PP: IntenseAtIndex + Send + Sync + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { let usize_filterfun = match self.filter_fun { Some(filterfun) => { let cl = |a: &usize, b: &usize| { - filterfun( - &prefiltered_peaks.get_intense_at_index(*a), - &prefiltered_peaks.get_intense_at_index(*b), - ) + filterfun(&distance_calculator.distance_at_indices(*a, *b)) }; let bind = Some(cl); bind @@ -261,12 +289,13 @@ where let mut state = DBSCANRunnerState::new(intensity_sorted_indices.len(), usize_filterfun); - let points: DBSCANPoints = DBSCANPoints { + let points: DBSCANPoints = DBSCANPoints { prefiltered_peaks, intensity_sorted_indices, indexed_points, quad_points, - _marker: PhantomData, + dist: distance_calculator, + _phantom_metric: PhantomData, }; // Q: if filter fun is required ... why is it an option? state = self.process_points(state, &points); @@ -291,14 +320,15 @@ where state.cluster_labels } - fn process_points( + fn process_points( &self, mut state: DBSCANRunnerState, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, ) -> DBSCANRunnerState where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { let my_progbar = state.create_progress_bar(points.intensity_sorted_indices.len(), self.progress); @@ -321,17 +351,18 @@ where } /// This method gets applied to every point in decreasing intensity order. - fn process_single_point( + fn process_single_point( &self, point_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { if cluster_labels.get(point_index) != ClusterLabel::Unassigned { return; @@ -359,21 +390,22 @@ where ); } - fn find_main_loop_neighbors( + fn find_main_loop_neighbors( &self, point_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { timers.outer_loop_nn_timer.reset_start(); let binding = points.quad_points.get_ndpoint(point_index); - let query_elems = self.converter.convert_to_bounds_query(&binding); + let query_elems = convert_to_bounds_query(&binding); let mut candidate_neighbors = points .indexed_points .query_ndrange(&query_elems.0, query_elems.1) @@ -394,8 +426,7 @@ where Some(res) => res, None => { let res = (self.filter_fun.unwrap())( - &points.prefiltered_peaks.get_intense_at_index(*i), - &points.prefiltered_peaks.get_intense_at_index(point_index), + &points.get_distance_at_indices(*i, point_index), ); tmp.set(*i, point_index, res); res @@ -422,7 +453,7 @@ where timers: &mut DBScanTimers, ) -> bool where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, { timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors @@ -433,17 +464,18 @@ where return neighbor_intensity_total >= self.min_intensity; } - fn main_loop_expand_cluster( + fn main_loop_expand_cluster( &self, apex_point_index: usize, neighbors: Vec, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { cluster_labels.set_new_cluster(apex_point_index); let mut seed_set: Vec = neighbors; @@ -486,19 +518,20 @@ where } } - fn find_local_neighbors( + fn find_local_neighbors( &self, neighbor_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { timers.inner_loop_nn_timer.reset_start(); let binding = points.quad_points.get_ndpoint(neighbor_index); - let inner_query_elems = self.converter.convert_to_bounds_query(&binding); + let inner_query_elems = convert_to_bounds_query(&binding); let local_neighbors: Vec = points .indexed_points .query_ndrange(&inner_query_elems.0, inner_query_elems.1) @@ -509,19 +542,20 @@ where local_neighbors } - fn filter_neighbors_inner_loop( + fn filter_neighbors_inner_loop( &self, local_neighbors: Vec, cluster_apex_point_index: usize, current_center_point_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, cluster_labels: &ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { let filtered = self.apply_filter_fun( local_neighbors, @@ -545,16 +579,17 @@ where ) } - fn filter_by_apex_distance( + fn filter_by_apex_distance( &self, mut neighbors: Vec, apex_point_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { timers.local_neighbor_filter_timer.reset_start(); let query_point = &points.quad_points.get_ndpoint(apex_point_index); @@ -565,16 +600,17 @@ where neighbors } - fn is_extension_core_point( + fn is_extension_core_point( &self, neighbors: &[usize], current_center_point_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, timers: &mut DBScanTimers, ) -> bool where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { timers.inner_intensity_calculation.reset_start(); let mut neighbor_intensity_total: u64 = neighbors @@ -598,16 +634,17 @@ where /// one could pass a function that checks if the chromatograms a high correlation. /// Because two might share the same point in space, intensity is not really /// relevant but co-elution might be critical. - fn apply_filter_fun( + fn apply_filter_fun( &self, local_neighbors: Vec, point_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, filter_fun_cache: &mut Option, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { if let Some(cache) = filter_fun_cache { local_neighbors @@ -615,8 +652,7 @@ where .filter(|&i| { cache.get(i, point_index).unwrap_or_else(|| { let res = (self.filter_fun.unwrap())( - &points.prefiltered_peaks.get_intense_at_index(i), - &points.prefiltered_peaks.get_intense_at_index(point_index), + &points.get_distance_at_indices(i, point_index), ); cache.set(i, point_index, res); res @@ -637,16 +673,17 @@ where neighbors } - fn filter_by_local_intensity_and_distance( + fn filter_by_local_intensity_and_distance( &self, mut neighbors: Vec, neighbor_index: usize, - points: &DBSCANPoints<'a, N, E, PP, QP>, + points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, + PP: IntenseAtIndex + Send + ?Sized, QP: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync, { timers.local_neighbor_filter_timer.reset_start(); let query_intensity = points.prefiltered_peaks.intensity_at_index(neighbor_index); @@ -682,6 +719,7 @@ pub fn _dbscan< C: NDPointConverter, E: Sync + Copy + HasIntensity, T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, + D: Send + Sync, >( indexed_points: &'a T, prefiltered_peaks: &'a [E], @@ -689,15 +727,13 @@ pub fn _dbscan< min_n: usize, min_intensity: u64, intensity_sorted_indices: &'a Vec<(usize, u64)>, - filter_fun: Option<&'a (dyn Fn(&E, &E) -> bool + Send + Sync)>, - converter: C, + filter_fun: Option<&'a (dyn Fn(&D) -> bool + Send + Sync)>, progress: bool, max_extension_distances: &'a [f32; N], ) -> ClusterLabels { let runner = DBSCANRunner { min_n, min_intensity, - converter, progress, filter_fun: filter_fun, max_extension_distances, @@ -708,6 +744,7 @@ pub fn _dbscan< intensity_sorted_indices, indexed_points, quad_points, + indexed_points, ); cluster_labels diff --git a/src/ms/frames.rs b/src/ms/frames.rs index 0698081..3db5c6c 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames.rs @@ -725,23 +725,36 @@ mod tests { } } -impl<'a> IntenseAtIndex for FrameSlice<'a> { - fn get_intense_at_index( +impl<'a> IntenseAtIndex for FrameSlice<'a> { + fn intensity_at_index( &self, index: usize, - ) -> RawTimsPeak { - let intensity = self.intensities[index]; - let tof_index = self.tof_indices[index]; - let scan_index = self.global_scan_at_index(index); - - let out = RawTimsPeak { - intensity, - tof_index, - scan_index, - }; - - out + ) -> u64 { + self.intensities[index] as u64 } + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + self.intensities[index] as u64 + } + + // fn get_intense_at_index( + // &self, + // index: usize, + // ) -> RawTimsPeak { + // let intensity = self.intensities[index]; + // let tof_index = self.tof_indices[index]; + // let scan_index = self.global_scan_at_index(index); + + // let out = RawTimsPeak { + // intensity, + // tof_index, + // scan_index, + // }; + + // out + // } } impl<'a> AsNDPointsAtIndex<3> for FrameSlice<'a> { diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 16becdb..3d5ba83 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -1,5 +1,3 @@ -use std::ops::Index; - #[derive(Debug, Clone, Copy)] pub struct NDBoundary { pub starts: [f32; DIMENSIONALITY], @@ -145,37 +143,51 @@ pub trait HasIntensity: Sync + Send { } } -pub trait IntenseAtIndex -where - T: HasIntensity, -{ +pub trait IntenseAtIndex { fn intensity_at_index( &self, index: usize, + ) -> u64; + fn weight_at_index( + &self, + index: usize, ) -> u64 { - self.get_intense_at_index(index).intensity() + self.intensity_at_index(index) } - fn weight_at_index( +} + +impl IntenseAtIndex for [T] +where + T: HasIntensity + Copy, +{ + fn intensity_at_index( &self, index: usize, ) -> u64 { - self.get_intense_at_index(index).weight() + self[index].intensity() } - fn get_intense_at_index( +} + +pub trait DistantAtIndex { + fn distance_at_indices( &self, index: usize, + other: usize, ) -> T; } -impl IntenseAtIndex for [T] -where - T: HasIntensity + Copy, -{ - fn get_intense_at_index( +impl DistantAtIndex for [NDPoint] { + fn distance_at_indices( &self, index: usize, - ) -> T { - self[index] + other: usize, + ) -> f32 { + let mut sum = 0.0; + for i in 0..N { + let diff = self[index].values[i] - self[other].values[i]; + sum += diff * diff; + } + sum.sqrt() } } @@ -203,27 +215,27 @@ pub trait NDPointConverter { let boundary = NDBoundary::from_ndpoints(&points); (points, boundary) } - fn convert_to_bounds_query<'a>( - &self, - point: &'a NDPoint, - ) -> (NDBoundary, Option<&'a NDPoint>) { - let bounds = NDBoundary::new( - point - .values - .iter() - .map(|x| *x - 1.) - .collect::>() - .try_into() - .unwrap(), - point - .values - .iter() - .map(|x| *x + 1.) - .collect::>() - .try_into() - .unwrap(), - ); - - (bounds, Some(point)) - } +} + +pub fn convert_to_bounds_query<'a, const D: usize>( + point: &'a NDPoint +) -> (NDBoundary, Option<&'a NDPoint>) { + let bounds = NDBoundary::new( + point + .values + .iter() + .map(|x| *x - 1.) + .collect::>() + .try_into() + .unwrap(), + point + .values + .iter() + .map(|x| *x + 1.) + .collect::>() + .try_into() + .unwrap(), + ); + + (bounds, Some(point)) } From 5daf5f3a36637eeee3c8d0a40a1b2a735ba9d9f8 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Mon, 15 Jul 2024 20:07:27 -0700 Subject: [PATCH 16/26] (finished) migration from slices of elements to indexable traits --- README.md | 9 +- src/aggregation/dbscan/dbscan.rs | 22 ++- src/aggregation/dbscan/denseframe_dbscan.rs | 28 +++- src/aggregation/dbscan/runner.rs | 168 ++++++++++---------- src/aggregation/tracing.rs | 104 +++++++----- 5 files changed, 197 insertions(+), 134 deletions(-) diff --git a/README.md b/README.md index 3dc5b37..5660599 100644 --- a/README.md +++ b/README.md @@ -89,12 +89,11 @@ RAYON_NUM_THREADS=4 # will set the number of threads to use in rayon 4. Add targeted extraction. 5. Add detection of MS1 features + notched search instead of wide window search. 6. Clean up some of the features and decide what aggregation steps use interal paralellism. (in some steps making multiple aggregations in paralle is better than doing parallel operations within the aggregation). - - Fix nomenclature ... I dont like how it is not consistent (indexed, indexer, index are using interchangeably ...). 7. Compilation warning cleanup. 8. Clean up dead/commented out code. 9. Refactor `max_extension_distances` argument in the generic dbscan implementation to prevent the errors that might arise from mixing up the dimensions. - Should that be a propoerty of the converter? -10. Commit to f32/f64 in specific places ... instead of the harder to maintain generic types. + - There is no longer a converter there ... so nope 11. Add CICD to distribute the pre-compiled binaries. 12. Add semver checks to the CICD pipeline. 13. Add IMS output to the sage report. @@ -107,4 +106,8 @@ RAYON_NUM_THREADS=4 # will set the number of threads to use in rayon ## Where are we at? -- Ids are pretty close to the equivalent DDA runs with the correct parameters ... They do seem good via manual inspection but the number of ids is low compared to peptide-centric searches. +- Ids are pretty close to the equivalent DDA runs with the correct parameters ... + - Emphasis in right parameters ... they do seem to vary quite a bit depending on + the intrument/method/sample. I am working on improve this. +- Spectra quality does seem good via manual inspection but the number of ids + is low compared to peptide-centric searches. diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index 3bc2cd1..fe5eab9 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,12 +1,15 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; use crate::space::kdtree::RadiusKDTree; -use crate::space::space_generics::{HasIntensity, NDPointConverter, QueriableIndexedPoints}; +use crate::space::space_generics::{ + convert_to_bounds_query, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPointConverter, + QueriableIndexedPoints, +}; use crate::utils; use log::{debug, info, trace}; use rayon::prelude::*; use std::ops::Add; -use crate::aggregation::dbscan::runner::_dbscan; +use crate::aggregation::dbscan::runner::dbscan_label_clusters; // Pretty simple function ... it uses every passed centroid, converts it to a point // and generates a new centroid that aggregates all the points in its range. @@ -35,7 +38,7 @@ fn reassign_centroid< for centroid in centroids { let query_point = centroid_converter.convert(¢roid); - let mut query_elems = centroid_converter.convert_to_bounds_query(&query_point); + let mut query_elems = convert_to_bounds_query(&query_point); query_elems.0.expand(expansion_factors); // trace!("Querying for Centroid: {:?}", query_elems.1); @@ -66,6 +69,7 @@ pub fn dbscan_generic< G: Sync + Send + ClusterAggregator, T: HasIntensity + Send + Clone + Copy + Sync, F: Fn() -> G + Send + Sync, + D: Send + Sync, const N: usize, >( converter: C, @@ -73,12 +77,15 @@ pub fn dbscan_generic< min_n: usize, min_intensity: u64, def_aggregator: F, - extra_filter_fun: Option<&(dyn Fn(&T, &T) -> bool + Send + Sync)>, + extra_filter_fun: Option<&(dyn Fn(&D) -> bool + Send + Sync)>, log_level: Option, keep_unclustered: bool, max_extension_distances: &[f32; N], back_converter: Option, -) -> Vec { +) -> Vec +where + Vec: IntenseAtIndex + DistantAtIndex, +{ let show_progress = log_level.is_some(); let log_level = match log_level { Some(x) => x, @@ -110,15 +117,14 @@ pub fn dbscan_generic< i_timer.stop(true); let mut i_timer = timer.start_sub_timer("dbscan"); - let cluster_labels = _dbscan( + let cluster_labels = dbscan_label_clusters( &tree, &prefiltered_peaks, - &ndpoints, + ndpoints.as_slice(), min_n, min_intensity, &intensity_sorted_indices, extra_filter_fun, - converter, show_progress, max_extension_distances, ); diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 501bcc8..0e31eda 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -2,6 +2,7 @@ use crate::aggregation::aggregators::TimsPeakAggregator; use crate::aggregation::converters::{BypassDenseFrameBackConverter, DenseFrameConverter}; use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::ms::frames::{DenseFrame, TimsPeak}; +use crate::space::space_generics::{DistantAtIndex, IntenseAtIndex}; use crate::utils::within_distance_apply; // bool> @@ -50,7 +51,7 @@ pub fn dbscan_denseframe( min_n, min_intensity, TimsPeakAggregator::default, - None::<&(dyn Fn(&TimsPeak, &TimsPeak) -> bool + Send + Sync)>, + None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, None, true, &[max_mz_extension as f32, max_ims_extension], @@ -65,3 +66,28 @@ pub fn dbscan_denseframe( sorted: None, } } + +impl IntenseAtIndex for Vec { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self[index].intensity as u64 + } +} + +impl DistantAtIndex for Vec { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + panic!("I dont think this is called ever ..."); + let mut sum = 0.0; + let diff_mz = (self[index].mz - self[other].mz) as f32; + sum += diff_mz * diff_mz; + let diff_ims = self[index].mobility - self[other].mobility; + sum += diff_ims * diff_ims; + sum.sqrt() + } +} diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 79d9522..9bd02e6 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -4,6 +4,7 @@ use crate::space::space_generics::{ NDPoint, QueriableIndexedPoints, }; use std::marker::PhantomData; +use std::ops::Index; use crate::utils; use indicatif::ProgressIterator; @@ -216,17 +217,17 @@ struct DBSCANRunner<'a, const N: usize, D> { max_extension_distances: &'a [f32; N], } -struct DBSCANPoints<'a, const N: usize, PP, QP, D, E> +struct DBSCANPoints<'a, const N: usize, PP, PE, DAI, E> where PP: IntenseAtIndex + std::marker::Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - D: DistantAtIndex, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + ?Sized, { - prefiltered_peaks: &'a PP, // &'a Vec, + raw_elements: &'a PP, // &'a Vec, intensity_sorted_indices: &'a Vec<(usize, u64)>, indexed_points: &'a (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), - quad_points: &'a QP, // [NDPoint], - dist: D, + projected_elements: &'a PE, // [NDPoint], + raw_dist: &'a DAI, _phantom_metric: PhantomData, } @@ -234,20 +235,20 @@ impl<'a, const N: usize, PP, QQ, D, E> DBSCANPoints<'a, N, PP, QQ, D, E> where PP: IntenseAtIndex + std::marker::Send + ?Sized, QQ: AsNDPointsAtIndex + ?Sized, - D: DistantAtIndex, + D: DistantAtIndex + ?Sized, { fn get_intensity_at_index( &self, index: usize, ) -> u64 { - self.prefiltered_peaks.intensity_at_index(index) + self.raw_elements.intensity_at_index(index) } fn get_ndpoint_at_index( &self, index: usize, ) -> NDPoint { - self.quad_points.get_ndpoint(index) + self.projected_elements.get_ndpoint(index) } fn get_distance_at_indices( @@ -255,7 +256,7 @@ where a: usize, b: usize, ) -> E { - self.dist.distance_at_indices(a, b) + self.raw_dist.distance_at_indices(a, b) } } @@ -263,23 +264,23 @@ impl<'a, 'b: 'a, const N: usize, D> DBSCANRunner<'a, N, D> where D: Sync, { - fn run( + fn run( &self, - prefiltered_peaks: &'b PP, // Vec, // trait impl Index + raw_elements: &'b PP, // Vec, // trait impl Index intensity_sorted_indices: &'b Vec<(usize, u64)>, indexed_points: &'b (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), - quad_points: &'b QP, //[NDPoint], // trait impl AsNDPointAtIndex> - distance_calculator: DAI, + projected_elements: &'b PE, //[NDPoint], // trait impl AsNDPointAtIndex> + raw_distance_calculator: &'b DAI, ) -> ClusterLabels where PP: IntenseAtIndex + Send + Sync + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { let usize_filterfun = match self.filter_fun { Some(filterfun) => { let cl = |a: &usize, b: &usize| { - filterfun(&distance_calculator.distance_at_indices(*a, *b)) + filterfun(&raw_distance_calculator.distance_at_indices(*a, *b)) }; let bind = Some(cl); bind @@ -289,12 +290,12 @@ where let mut state = DBSCANRunnerState::new(intensity_sorted_indices.len(), usize_filterfun); - let points: DBSCANPoints = DBSCANPoints { - prefiltered_peaks, + let points: DBSCANPoints = DBSCANPoints { + raw_elements, intensity_sorted_indices, indexed_points, - quad_points, - dist: distance_calculator, + projected_elements, + raw_dist: raw_distance_calculator, _phantom_metric: PhantomData, }; // Q: if filter fun is required ... why is it an option? @@ -320,15 +321,15 @@ where state.cluster_labels } - fn process_points( + fn process_points( &self, mut state: DBSCANRunnerState, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, ) -> DBSCANRunnerState where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { let my_progbar = state.create_progress_bar(points.intensity_sorted_indices.len(), self.progress); @@ -351,18 +352,18 @@ where } /// This method gets applied to every point in decreasing intensity order. - fn process_single_point( + fn process_single_point( &self, point_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { if cluster_labels.get(point_index) != ClusterLabel::Unassigned { return; @@ -375,7 +376,7 @@ where timers, cc_metrics, ); - if !self.is_core_point(&neighbors, points.prefiltered_peaks, timers) { + if !self.is_core_point(&neighbors, points.raw_elements, timers) { cluster_labels.set_noise(point_index); return; } @@ -390,21 +391,21 @@ where ); } - fn find_main_loop_neighbors( + fn find_main_loop_neighbors( &self, point_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { timers.outer_loop_nn_timer.reset_start(); - let binding = points.quad_points.get_ndpoint(point_index); + let binding = points.projected_elements.get_ndpoint(point_index); let query_elems = convert_to_bounds_query(&binding); let mut candidate_neighbors = points .indexed_points @@ -449,7 +450,7 @@ where fn is_core_point( &self, neighbors: &[usize], - prefiltered_peaks: &'a PP, + raw_elements: &'a PP, timers: &mut DBScanTimers, ) -> bool where @@ -458,24 +459,24 @@ where timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors .iter() - .map(|i| prefiltered_peaks.intensity_at_index(*i)) + .map(|i| raw_elements.intensity_at_index(*i)) .sum::(); timers.outer_intensity_calculation.stop(false); return neighbor_intensity_total >= self.min_intensity; } - fn main_loop_expand_cluster( + fn main_loop_expand_cluster( &self, apex_point_index: usize, neighbors: Vec, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { cluster_labels.set_new_cluster(apex_point_index); let mut seed_set: Vec = neighbors; @@ -518,19 +519,19 @@ where } } - fn find_local_neighbors( + fn find_local_neighbors( &self, neighbor_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { timers.inner_loop_nn_timer.reset_start(); - let binding = points.quad_points.get_ndpoint(neighbor_index); + let binding = points.projected_elements.get_ndpoint(neighbor_index); let inner_query_elems = convert_to_bounds_query(&binding); let local_neighbors: Vec = points .indexed_points @@ -542,20 +543,20 @@ where local_neighbors } - fn filter_neighbors_inner_loop( + fn filter_neighbors_inner_loop( &self, local_neighbors: Vec, cluster_apex_point_index: usize, current_center_point_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, cluster_labels: &ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { let filtered = self.apply_filter_fun( local_neighbors, @@ -579,47 +580,47 @@ where ) } - fn filter_by_apex_distance( + fn filter_by_apex_distance( &self, mut neighbors: Vec, apex_point_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { timers.local_neighbor_filter_timer.reset_start(); - let query_point = &points.quad_points.get_ndpoint(apex_point_index); + let query_point = &points.projected_elements.get_ndpoint(apex_point_index); neighbors.retain(|&i| { - self.is_within_max_distance(&points.quad_points.get_ndpoint(i), query_point) + self.is_within_max_distance(&points.projected_elements.get_ndpoint(i), query_point) }); timers.local_neighbor_filter_timer.stop(false); neighbors } - fn is_extension_core_point( + fn is_extension_core_point( &self, neighbors: &[usize], current_center_point_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, timers: &mut DBScanTimers, ) -> bool where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { timers.inner_intensity_calculation.reset_start(); let mut neighbor_intensity_total: u64 = neighbors .iter() - .map(|&i| points.prefiltered_peaks.intensity_at_index(i)) + .map(|&i| points.raw_elements.intensity_at_index(i)) .sum(); neighbor_intensity_total += points - .prefiltered_peaks + .raw_elements .intensity_at_index(current_center_point_index); timers.inner_intensity_calculation.stop(false); @@ -634,17 +635,17 @@ where /// one could pass a function that checks if the chromatograms a high correlation. /// Because two might share the same point in space, intensity is not really /// relevant but co-elution might be critical. - fn apply_filter_fun( + fn apply_filter_fun( &self, local_neighbors: Vec, point_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, filter_fun_cache: &mut Option, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { if let Some(cache) = filter_fun_cache { local_neighbors @@ -673,26 +674,26 @@ where neighbors } - fn filter_by_local_intensity_and_distance( + fn filter_by_local_intensity_and_distance( &self, mut neighbors: Vec, neighbor_index: usize, - points: &DBSCANPoints<'a, N, PP, QP, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, - QP: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync, + PE: AsNDPointsAtIndex + ?Sized, + DAI: DistantAtIndex + Send + Sync + ?Sized, { timers.local_neighbor_filter_timer.reset_start(); - let query_intensity = points.prefiltered_peaks.intensity_at_index(neighbor_index); - let query_point = &points.quad_points.get_ndpoint(neighbor_index); + let query_intensity = points.raw_elements.intensity_at_index(neighbor_index); + let query_point = &points.projected_elements.get_ndpoint(neighbor_index); neighbors.retain(|&i| { - let going_downhill = points.prefiltered_peaks.intensity_at_index(i) <= query_intensity; + let going_downhill = points.raw_elements.intensity_at_index(i) <= query_intensity; let within_distance = - self.is_within_max_distance(&points.quad_points.get_ndpoint(i), query_point); + self.is_within_max_distance(&points.projected_elements.get_ndpoint(i), query_point); going_downhill && within_distance }); @@ -713,17 +714,18 @@ where } } -pub fn _dbscan< +pub fn dbscan_label_clusters< 'a, const N: usize, - C: NDPointConverter, - E: Sync + Copy + HasIntensity, - T: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, + RE: IntenseAtIndex + DistantAtIndex + Send + Sync + Index + ?Sized, + T: QueriableIndexedPoints<'a, N, usize> + Send + std::marker::Sync, + PE: AsNDPointsAtIndex + Send + Sync + ?Sized, D: Send + Sync, + E: HasIntensity + Send + Sync, >( indexed_points: &'a T, - prefiltered_peaks: &'a [E], - quad_points: &'a [NDPoint], + raw_elements: &'a RE, + projected_elements: &'a PE, // [NDPoint], min_n: usize, min_intensity: u64, intensity_sorted_indices: &'a Vec<(usize, u64)>, @@ -740,11 +742,11 @@ pub fn _dbscan< }; let cluster_labels = runner.run( - prefiltered_peaks, + raw_elements, intensity_sorted_indices, indexed_points, - quad_points, - indexed_points, + projected_elements, + raw_elements, ); cluster_labels diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 597f36e..d2dcf9d 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -4,8 +4,10 @@ use crate::aggregation::chromatograms::{ }; use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::ms::frames::DenseFrameWindow; -use crate::space::space_generics::NDBoundary; -use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter, TraceLike}; +use crate::space::space_generics::{ + DistantAtIndex, HasIntensity, NDPoint, NDPointConverter, TraceLike, +}; +use crate::space::space_generics::{IntenseAtIndex, NDBoundary}; use crate::utils; use crate::utils::RollingSDCalculator; @@ -429,6 +431,25 @@ fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec>() } +impl IntenseAtIndex for Vec { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self[index].intensity + } +} + +impl DistantAtIndex for Vec { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + panic!("I dont think this is called ever ..."); + } +} + // Needed to specify the generic in dbscan_generic type FFTimeTimsPeak = fn(&TimeTimsPeak, &TimeTimsPeak) -> bool; @@ -478,7 +499,7 @@ fn _combine_single_window_traces( quad_low_high: window_quad_low_high, btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize), }, - None::<&(dyn Fn(&TimeTimsPeak, &TimeTimsPeak) -> bool + Send + Sync)>, + None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, None, false, &max_extension_distances, @@ -624,30 +645,6 @@ impl NDPointConverter for BaseTraceConverter { ], } } - - fn convert_to_bounds_query<'a>( - &self, - point: &'a NDPoint<3>, - ) -> (NDBoundary<3>, Option<&'a NDPoint<3>>) { - const NUM_DIMENTIONS: usize = 3; - // let range_center = (point.values[1] + point.values[2]) / 2.; - let mut starts = point.values; - let mut ends = point.values; - for i in 0..NUM_DIMENTIONS { - starts[i] -= 1.; - ends[i] += 1.; - } - - // // KEY = [-------] - // // Allowed ends = [------] - // // Allowed starts = [------] - - // ends[1] = range_center; - // starts[2] = range_center; - - let bounds = NDBoundary::new(starts, ends); - (bounds, Some(point)) - } } struct PseudoScanBackConverter { @@ -699,6 +696,42 @@ impl Default for PseudoscanGenerationConfig { } } +impl IntenseAtIndex for Vec { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self[index].intensity + } +} + +struct BaseTraceDistance { + quad_diff: f32, + iou: f32, + cosine: f32, +} + +impl DistantAtIndex for Vec { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> BaseTraceDistance { + let quad_diff = (self[index].quad_center - self[other].quad_center).abs(); + let iou = self[index].rt_iou(&self[other]); + // Q: What can cause an error here?? + let cosine = self[index] + .chromatogram + .cosine_similarity(&self[other].chromatogram) + .unwrap(); + BaseTraceDistance { + quad_diff, + iou, + cosine, + } + } +} + pub fn combine_pseudospectra( traces: Vec, config: PseudoscanGenerationConfig, @@ -716,19 +749,12 @@ pub fn combine_pseudospectra( const IOU_THRESH: f32 = 0.1; const COSINE_THRESH: f32 = 0.8; - let extra_filter_fun = |x: &BaseTrace, y: &BaseTrace| { - let close_in_quad = (x.quad_center - y.quad_center).abs() < 5.0; - if !close_in_quad { - return false; - } - - let iou = x.rt_iou(y); - let within_iou_tolerance = iou > IOU_THRESH; - - let cosine = x.chromatogram.cosine_similarity(&y.chromatogram).unwrap(); - let within_cosine_tolerance = cosine > COSINE_THRESH; + let extra_filter_fun = |x: &BaseTraceDistance| { + let close_in_quad = (x.quad_diff).abs() < 5.0; + let within_iou_tolerance = x.iou > IOU_THRESH; + let within_cosine_tolerance = x.cosine > COSINE_THRESH; - within_iou_tolerance && within_cosine_tolerance + return close_in_quad && within_iou_tolerance && within_cosine_tolerance; }; let back_converter = PseudoScanBackConverter { From fbd19a4ac501baa567dbd1704c6fd9ca814b3d80 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 19 Jul 2024 14:09:47 -0700 Subject: [PATCH 17/26] (wip) parcial implementation of the actual frame slice window denoising --- src/aggregation/aggregators.rs | 12 +- src/aggregation/dbscan/dbscan.rs | 121 ++++- src/aggregation/dbscan/denseframe_dbscan.rs | 2 +- src/aggregation/dbscan/runner.rs | 2 +- src/aggregation/ms_denoise.rs | 337 ++++++++++++- src/aggregation/tracing.rs | 4 +- src/ms/frames/dense_frame_window.rs | 213 ++++++++ src/ms/{frames.rs => frames/frame_slice.rs} | 506 ++++++-------------- src/ms/frames/frames.rs | 76 +++ src/ms/frames/mod.rs | 6 + src/space/space_generics.rs | 14 +- 11 files changed, 884 insertions(+), 409 deletions(-) create mode 100644 src/ms/frames/dense_frame_window.rs rename src/ms/{frames.rs => frames/frame_slice.rs} (71%) create mode 100644 src/ms/frames/frames.rs create mode 100644 src/ms/frames/mod.rs diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index f5c2413..e6ee845 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,6 +1,7 @@ use crate::ms::frames::TimsPeak; -use crate::space::space_generics::HasIntensity; +use crate::space::space_generics::{HasIntensity, IntenseAtIndex}; use crate::utils; +use std::ops::Index; use rayon::prelude::*; @@ -80,13 +81,14 @@ impl ClusterAggregator for TimsPeakAggregator { pub fn aggregate_clusters< T: Send + Clone + Copy, + RE: Index + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, >( tot_clusters: u64, cluster_labels: Vec>, - elements: &[T], + elements: &RE, def_aggregator: &F, log_level: utils::LogLevel, keep_unclustered: bool, @@ -123,13 +125,14 @@ pub fn aggregate_clusters< fn parallel_aggregate_clusters< T: Send + Clone + Copy, + RE: Index + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, >( tot_clusters: u64, cluster_labels: Vec>, - elements: &[T], + elements: &RE, def_aggregator: &F, log_level: utils::LogLevel, keep_unclustered: bool, @@ -211,13 +214,14 @@ fn parallel_aggregate_clusters< fn serial_aggregate_clusters< T: Send + Clone + Copy, + RE: Index + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, >( tot_clusters: u64, cluster_labels: Vec>, - elements: &[T], + elements: &RE, def_aggregator: &F, keep_unclustered: bool, ) -> Vec { diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index fe5eab9..8ab3eb9 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,13 +1,13 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; use crate::space::kdtree::RadiusKDTree; use crate::space::space_generics::{ - convert_to_bounds_query, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPointConverter, - QueriableIndexedPoints, + convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, + NDPoint, NDPointConverter, QueriableIndexedPoints, }; -use crate::utils; +use crate::utils::{self, ContextTimer}; use log::{debug, info, trace}; use rayon::prelude::*; -use std::ops::Add; +use std::ops::{Add, Index}; use crate::aggregation::dbscan::runner::dbscan_label_clusters; @@ -23,12 +23,13 @@ fn reassign_centroid< I: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, G: Sync + Send + ClusterAggregator, R: Send, + RE: Send + Sync + Index + ?Sized, F: Fn() -> G + Send + Sync, >( centroids: Vec, indexed_points: &'a I, centroid_converter: C, - elements: &[T], + elements: &RE, def_aggregator: F, log_level: utils::LogLevel, expansion_factors: &[f32; N], @@ -62,18 +63,38 @@ fn reassign_centroid< // TODO: rename prefiltered peaks argument! // TODO implement a version that takes a sparse distance matrix. +impl AsNDPointsAtIndex for Vec> { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint { + self[index] + } + + fn num_ndpoints(&self) -> usize { + self.len() + } +} + pub fn dbscan_generic< C: NDPointConverter, C2: NDPointConverter, R: Send, G: Sync + Send + ClusterAggregator, T: HasIntensity + Send + Clone + Copy + Sync, + RE: IntenseAtIndex + + DistantAtIndex + + IntoIterator + + Send + + Sync + + Index + + ?Sized, F: Fn() -> G + Send + Sync, D: Send + Sync, const N: usize, >( converter: C, - prefiltered_peaks: Vec, + prefiltered_peaks: &RE, min_n: usize, min_intensity: u64, def_aggregator: F, @@ -84,7 +105,7 @@ pub fn dbscan_generic< back_converter: Option, ) -> Vec where - Vec: IntenseAtIndex + DistantAtIndex, + ::IntoIter: ExactSizeIterator, { let show_progress = log_level.is_some(); let log_level = match log_level { @@ -94,7 +115,7 @@ where let timer = utils::ContextTimer::new("dbscan_generic", true, log_level); let mut i_timer = timer.start_sub_timer("conversion"); - let (ndpoints, boundary) = converter.convert_vec(&prefiltered_peaks); + let (ndpoints, boundary) = converter.convert_iter(prefiltered_peaks.into_iter()); i_timer.stop(true); let mut i_timer = timer.start_sub_timer("tree"); @@ -106,9 +127,69 @@ where } i_timer.stop(true); + let centroids = dbscan_aggregate( + prefiltered_peaks, + &ndpoints, + &tree, + timer, + min_n, + min_intensity, + def_aggregator, + extra_filter_fun, + log_level, + keep_unclustered, + max_extension_distances, + show_progress, + ); + + match back_converter { + Some(bc) => reassign_centroid( + centroids, + &tree, + bc, + prefiltered_peaks, + &def_aggregator, + log_level, + max_extension_distances, + ), + None => centroids, + } +} + +pub fn dbscan_aggregate< + 'a, + const N: usize, + RE: IntenseAtIndex + + DistantAtIndex + + IntoIterator + + Send + + Sync + + Index + + ?Sized, + IND: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync + Send, + NAI: AsNDPointsAtIndex + std::marker::Sync + Send, + T: HasIntensity + Send + Clone + Copy + Sync, + D: Send + Sync, + G: Sync + Send + ClusterAggregator, + R: Send, + F: Fn() -> G + Send + Sync, +>( + prefiltered_peaks: &RE, + ndpoints: &NAI, + index: &IND, + timer: ContextTimer, + min_n: usize, + min_intensity: u64, + def_aggregator: F, + extra_filter_fun: Option<&(dyn Fn(&D) -> bool + Send + Sync)>, + log_level: utils::LogLevel, + keep_unclustered: bool, + max_extension_distances: &[f32; N], + show_progress: bool, +) -> Vec { let mut i_timer = timer.start_sub_timer("pre-sort"); let mut intensity_sorted_indices = prefiltered_peaks - .iter() + .into_iter() .enumerate() .map(|(i, peak)| (i, peak.intensity())) .collect::>(); @@ -118,9 +199,9 @@ where let mut i_timer = timer.start_sub_timer("dbscan"); let cluster_labels = dbscan_label_clusters( - &tree, - &prefiltered_peaks, - ndpoints.as_slice(), + index, + prefiltered_peaks, + ndpoints, min_n, min_intensity, &intensity_sorted_indices, @@ -133,22 +214,10 @@ where let centroids = aggregate_clusters( cluster_labels.num_clusters, cluster_labels.cluster_labels, - &prefiltered_peaks, + prefiltered_peaks, &def_aggregator, log_level, keep_unclustered, ); - - match back_converter { - Some(bc) => reassign_centroid( - centroids, - &tree, - bc, - &prefiltered_peaks, - &def_aggregator, - log_level, - max_extension_distances, - ), - None => centroids, - } + centroids } diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 0e31eda..3d5f83f 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -47,7 +47,7 @@ pub fn dbscan_denseframe( }; let peak_vec: Vec = dbscan_generic( converter, - prefiltered_peaks, + &prefiltered_peaks, min_n, min_intensity, TimsPeakAggregator::default, diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 9bd02e6..b266842 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -721,7 +721,7 @@ pub fn dbscan_label_clusters< T: QueriableIndexedPoints<'a, N, usize> + Send + std::marker::Sync, PE: AsNDPointsAtIndex + Send + Sync + ?Sized, D: Send + Sync, - E: HasIntensity + Send + Sync, + E: Send + Sync, >( indexed_points: &'a T, raw_elements: &'a RE, diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 3cea28f..8d6cc5f 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,13 +1,23 @@ use core::panic; +use std::ops::Index; +use crate::aggregation::dbscan::dbscan::dbscan_aggregate; use crate::aggregation::dbscan::denseframe_dbscan::dbscan_denseframe; +use crate::ms::frames::frames::RawTimsPeak; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; use crate::ms::frames::FrameSlice; +use crate::ms::frames::TimsPeak; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; +use crate::space::space_generics::AsNDPointsAtIndex; +use crate::space::space_generics::DistantAtIndex; +use crate::space::space_generics::IntenseAtIndex; +use crate::space::space_generics::NDPoint; +use crate::space::space_generics::QueriableIndexedPoints; use crate::utils; +use timsrust::ConvertableIndex; use indicatif::ParallelProgressIterator; use log::{info, trace, warn}; @@ -15,6 +25,11 @@ use rayon::prelude::*; use serde::{Deserialize, Serialize}; use timsrust::Frame; +use super::aggregators::aggregate_clusters; +use super::aggregators::ClusterAggregator; +use super::aggregators::TimsPeakAggregator; +use super::dbscan::runner::dbscan_label_clusters; + // TODO I can probably split the ms1 and ms2 ... #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct DenoiseConfig { @@ -135,6 +150,319 @@ fn _denoise_denseframe( denoised_frame } +#[derive(Debug)] +struct FrameSliceWindow<'a> { + window: &'a [FrameSlice<'a>], + reference_index: usize, + cum_lengths: Vec, +} + +#[derive(Debug, Clone, Copy)] +struct MaybeIntenseRawPeak { + intensity: u32, + tof_index: u32, + scan_index: usize, + weight_only: bool, +} + +impl FrameSliceWindow<'_> { + fn new<'a>(window: &'a [FrameSlice<'a>]) -> FrameSliceWindow<'a> { + let cum_lengths = window + .iter() + .map(|x| x.num_ndpoints()) + .scan(0, |acc, x| { + *acc += x; + Some(*acc) + }) + .collect(); + FrameSliceWindow { + window, + reference_index: window.len() / 2, + cum_lengths, + } + } + fn get_window_index( + &self, + index: usize, + ) -> (usize, usize) { + let mut pos = 0; + for (i, cum_length) in self.cum_lengths.iter().enumerate() { + if index < *cum_length { + pos = i; + break; + } + } + let within_window_index = index - self.cum_lengths[pos]; + (pos, within_window_index) + } +} + +impl Index for FrameSliceWindow<'_> { + type Output = MaybeIntenseRawPeak; + + fn index( + &self, + index: usize, + ) -> &Self::Output { + let (pos, within_window_index) = self.get_window_index(index); + let tmp = self.window[pos]; + let (tof, int) = tmp.tof_int_at_index(within_window_index); + let foo = MaybeIntenseRawPeak { + intensity: int, + tof_index: tof, + scan_index: tmp.global_scan_at_index(within_window_index), + weight_only: pos != self.reference_index, + }; + &foo + } +} + +impl IntenseAtIndex for FrameSliceWindow<'_> { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + let (pos, within_window_index) = self.get_window_index(index); + if pos == self.reference_index { + self.window[self.reference_index].intensity_at_index(within_window_index) + } else { + 0 + } + } + + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + let (pos, within_window_index) = self.get_window_index(index); + self.window[pos].weight_at_index(within_window_index) + } +} + +impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSliceWindow<'a> { + fn query_ndpoint( + &'a self, + point: &NDPoint<2>, + ) -> Vec<&'a usize> { + let mut out = Vec::new(); + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths).enumerate() { + let local_outs = frame.query_ndpoint(point); + for ii in local_outs { + out.push(&(ii + cum_length)); + } + } + out + } + + fn query_ndrange( + &'a self, + boundary: &crate::space::space_generics::NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec<&'a usize> { + let mut out = Vec::new(); + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths).enumerate() { + let local_outs = frame.query_ndrange(boundary, reference_point); + for ii in local_outs { + out.push(&(ii + cum_length)); + } + } + out + } +} + +impl DistantAtIndex for FrameSliceWindow<'_> { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + let (pos, within_window_index) = self.get_window_index(index); + let (pos_other, within_window_index_other) = self.get_window_index(other); + panic!("unimplemented"); + 0. + } +} + +impl AsNDPointsAtIndex<2> for FrameSliceWindow<'_> { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<2> { + let (pos, within_window_index) = self.get_window_index(index); + self.window[pos].get_ndpoint(within_window_index) + } + + fn num_ndpoints(&self) -> usize { + self.cum_lengths.last().unwrap().clone() + } +} + +#[derive(Default, Debug, Clone, Copy)] +pub struct RawWeightedTimsPeakAggregator { + pub cumulative_weighted_cluster_tof: u64, + pub cumulative_weighted_cluster_scan: u64, + pub cumulative_cluster_weight: u64, + pub cumulative_cluster_intensity: u64, + pub num_peaks: u64, + pub num_intense_peaks: u64, +} + +#[derive(Debug, Clone, Copy)] +struct RawScaleTimsPeak { + intensity: f64, + tof_index: f64, + scan_index: f64, + npeaks: u64, +} + +impl RawScaleTimsPeak { + fn to_timspeak( + &self, + mz_converter: &timsrust::Tof2MzConverter, + ims_converter: &timsrust::Scan2ImConverter, + ) -> TimsPeak { + TimsPeak { + intensity: self.intensity as u32, + mz: mz_converter.convert(self.tof_index), + mobility: ims_converter.convert(self.scan_index) as f32, + npeaks: self.npeaks as u32, + } + } +} + +impl ClusterAggregator for RawWeightedTimsPeakAggregator { + // Calculate the weight-weighted average of the cluster + // for mz and ims. The intensity is kept as is. + fn add( + &mut self, + elem: &MaybeIntenseRawPeak, + ) { + self.cumulative_cluster_intensity += + if elem.weight_only { 0 } else { elem.intensity } as u64; + self.cumulative_cluster_weight += elem.intensity as u64; + self.cumulative_weighted_cluster_tof += elem.tof_index as u64 * elem.intensity as u64; + self.cumulative_weighted_cluster_scan += elem.scan_index as u64 * elem.intensity as u64; + self.num_peaks += 1; + if !elem.weight_only { + self.num_intense_peaks += 1; + }; + } + + fn aggregate(&self) -> RawScaleTimsPeak { + // Use raw + RawScaleTimsPeak { + intensity: self.cumulative_cluster_intensity as f64, + tof_index: self.cumulative_weighted_cluster_tof as f64 + / self.cumulative_cluster_weight as f64, + scan_index: self.cumulative_weighted_cluster_scan as f64 + / self.cumulative_cluster_weight as f64, + npeaks: self.num_intense_peaks, + } + } + + fn combine( + self, + other: Self, + ) -> Self { + Self { + cumulative_weighted_cluster_tof: self.cumulative_weighted_cluster_tof + + other.cumulative_weighted_cluster_tof, + cumulative_weighted_cluster_scan: self.cumulative_weighted_cluster_scan + + other.cumulative_weighted_cluster_scan, + cumulative_cluster_weight: self.cumulative_cluster_weight + + other.cumulative_cluster_weight, + cumulative_cluster_intensity: self.cumulative_cluster_intensity + + other.cumulative_cluster_intensity, + num_peaks: self.num_peaks + other.num_peaks, + num_intense_peaks: self.num_intense_peaks + other.num_intense_peaks, + } + } +} + +fn denoise_frame_slice_window( + frameslice_window: &[FrameSlice], + ims_converter: &timsrust::Scan2ImConverter, + mz_converter: &timsrust::Tof2MzConverter, + dia_frame_info: &DIAFrameInfo, + min_n: usize, + min_intensity: u64, + mz_scaling: f64, + max_mz_extension: f64, + ims_scaling: f32, + max_ims_extension: f32, +) -> DenseFrameWindow { + let timer = utils::ContextTimer::new("dbscan_dfs", true, utils::LogLevel::TRACE); + let fsw = FrameSliceWindow::new(frameslice_window); + // dbscan_aggregate( + // &fsw, + // &fsw, + // &fsw, + // timer, + // min_n, + // min_intensity, + // TimsPeakAggregator::default, + // None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, + // utils::LogLevel::TRACE, + // false, + // &[max_mz_extension as f32, max_ims_extension], + // false, + // ); + + let mut intensity_sorted_indices = frameslice_window + .iter() + .map(|x| x.intensities) + .flat_map(|x| x) + .enumerate() + .map(|(i, x)| (i, *x as u64)) + .collect::>(); + + intensity_sorted_indices.par_sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + + let mut i_timer = timer.start_sub_timer("dbscan"); + let cluster_labels = dbscan_label_clusters( + &fsw, + &fsw, + &fsw, + min_n, + min_intensity, + &intensity_sorted_indices, + None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, + false, + &[10., 100.], + ); + i_timer.stop(true); + + let centroids = aggregate_clusters( + cluster_labels.num_clusters, + cluster_labels.cluster_labels, + &fsw, + &RawWeightedTimsPeakAggregator::default, + utils::LogLevel::TRACE, + false, + ); + + let out = DenseFrameWindow { + frame: DenseFrame { + raw_peaks: centroids + .into_iter() + .map(|x| x.to_timspeak(mz_converter, ims_converter)) + .collect(), + index: 0, + rt: 0., + frame_type: timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF), + sorted: None, + }, + ims_min: 0., + ims_max: 0., + mz_start: 0., + mz_end: 0., + group_id: 0, + quad_group_id: 0, + }; + + out +} + fn denoise_frame_slice( frame_window: &FrameSlice, ims_converter: &timsrust::Scan2ImConverter, @@ -290,11 +618,12 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> info!("Denoising window {}/{}", i + 1, num_windows); let progbar = indicatif::ProgressBar::new(sv.len() as u64); let denoised_elements: Vec = sv - .into_par_iter() + .as_slice() + .par_windows(3) .progress_with(progbar) - .map(|x| { - denoise_frame_slice( - &x, + .map(|rt_window_of_slices| { + denoise_frame_slice_window( + rt_window_of_slices, &self.ims_converter, &self.mz_converter, &self.dia_frame_info, diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index d2dcf9d..5d505e5 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -486,7 +486,7 @@ fn _combine_single_window_traces( // TODO make dbscan_generic a runner-class let out_traces: Vec = dbscan_generic( converter, - prefiltered_peaks, + &prefiltered_peaks, min_n, min_intensity.into(), || TraceAggregator { @@ -770,7 +770,7 @@ pub fn combine_pseudospectra( let foo: Vec = dbscan_generic( converter, - traces, + &traces, config.min_n.into(), config.min_neighbor_intensity.into(), PseudoSpectrumAggregator::default, diff --git a/src/ms/frames/dense_frame_window.rs b/src/ms/frames/dense_frame_window.rs new file mode 100644 index 0000000..ef763e7 --- /dev/null +++ b/src/ms/frames/dense_frame_window.rs @@ -0,0 +1,213 @@ +use timsrust::{ConvertableIndex, Frame, Scan2ImConverter, Tof2MzConverter}; + +use crate::ms::{ + frames::MsMsFrameSliceWindowInfo, + tdf::{DIAFrameInfo, ScanRange}, +}; + +use super::{frames::SortingOrder, DenseFrame, FrameSlice, TimsPeak}; +use log::info; + +pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); +fn check_peak_sanity(peak: &TimsPeak) { + debug_assert!(peak.intensity > 0); + debug_assert!(peak.mz > 0.); + debug_assert!(peak.mobility > 0.); + debug_assert!(peak.npeaks > 0); +} + +#[derive(Debug, Clone)] +pub struct DenseFrameWindow { + pub frame: DenseFrame, + pub ims_min: f32, + pub ims_max: f32, + pub mz_start: f64, + pub mz_end: f64, + pub group_id: usize, + pub quad_group_id: usize, +} + +impl DenseFrameWindow { + pub fn from_frame_window( + frame_window: &FrameSlice, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, + dia_info: &DIAFrameInfo, + ) -> DenseFrameWindow { + let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { + None => { + panic!("No window info") + // This branch points to an error in logic ... + // The window info should always be present in this context. + }, + Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { + // This branch should be easy to implement for things like synchro pasef... + // Some details to iron out though ... + panic!("Not implemented") + }, + Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { + let window_group_id = x.window_group_id; + let ww_quad_group_id = x.within_window_quad_group_id; + let scan_start = frame_window.scan_start; + (window_group_id, ww_quad_group_id, scan_start) + }, + }; + + // NOTE: I am swapping here the 'scan start' to be the `ims_end` because + // the first scans have lower 1/k0 values. + let ims_max = ims_converter.convert(scan_start as u32) as f32; + let ims_min = + ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; + + debug_assert!(ims_max <= ims_min); + + let scan_range: Option<&ScanRange> = + dia_info.get_quad_windows(window_group_id, ww_quad_group_id); + let scan_range = match scan_range { + Some(x) => x, + None => { + panic!( + "No scan range for window_group_id: {}, within_window_quad_group_id: {}", + window_group_id, ww_quad_group_id + ); + }, + }; + + let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); + + DenseFrameWindow { + frame, + ims_min, + ims_max, + mz_start: scan_range.iso_low as f64, + mz_end: scan_range.iso_high as f64, + group_id: window_group_id, + quad_group_id: ww_quad_group_id, + } + } +} + +impl DenseFrame { + pub fn from_frame( + frame: &Frame, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, + ) -> DenseFrame { + let mut expanded_scan_indices = Vec::with_capacity(frame.tof_indices.len()); + let mut last_scan_offset = frame.scan_offsets[0]; + for (scan_index, index_offset) in frame.scan_offsets[1..].iter().enumerate() { + let num_tofs = index_offset - last_scan_offset; + + let ims = ims_converter.convert(scan_index as u32) as f32; + expanded_scan_indices.extend(vec![ims; num_tofs]); + last_scan_offset = *index_offset; + } + + let peaks = expanded_scan_indices + .iter() + .zip(frame.tof_indices.iter()) + .zip(frame.intensities.iter()) + .map(|((scan_index, tof_index), intensity)| TimsPeak { + intensity: *intensity, + mz: mz_converter.convert(*tof_index), + mobility: *scan_index, + npeaks: 1, + }) + .collect::>(); + + if cfg!(debug_assertions) { + for peak in peaks.iter() { + check_peak_sanity(peak); + } + } + + let index = frame.index; + let rt = frame.rt; + let frame_type = frame.frame_type; + + DenseFrame { + raw_peaks: peaks, + index, + rt, + frame_type, + sorted: None, + } + } + + pub fn from_frame_window( + frame_window: &FrameSlice, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, + ) -> DenseFrame { + let mut expanded_scan_indices = Vec::with_capacity(frame_window.tof_indices.len()); + let mut last_scan_offset = frame_window.scan_offsets[0]; + for (scan_index, index_offset) in frame_window.scan_offsets[1..].iter().enumerate() { + let num_tofs = index_offset - last_scan_offset; + let scan_index_use = (scan_index + frame_window.scan_start) as u32; + + let ims = ims_converter.convert(scan_index as f64) as f32; + if ims < 0.0 { + info!("Negative IMS value: {}", ims); + info!("scan_index_use: {}", scan_index_use); + info!("scan_index: {}", scan_index); + info!("frame_window.scan_start: {}", frame_window.scan_start); + } + debug_assert!(ims >= 0.0); + expanded_scan_indices.extend(vec![ims; num_tofs]); + last_scan_offset = *index_offset; + } + debug_assert!(last_scan_offset == frame_window.tof_indices.len()); + + let peaks = expanded_scan_indices + .iter() + .zip(frame_window.tof_indices.iter()) + .zip(frame_window.intensities.iter()) + .map(|((scan_index, tof_index), intensity)| TimsPeak { + intensity: *intensity, + mz: mz_converter.convert(*tof_index), + mobility: *scan_index, + npeaks: 1, + }) + .collect::>(); + + if cfg!(debug_assertions) { + for peak in peaks.iter() { + check_peak_sanity(peak); + } + } + + let index = frame_window.parent_frame_index; + let rt = frame_window.rt; + let frame_type = frame_window.frame_type; + + DenseFrame { + raw_peaks: peaks, + index, + rt, + frame_type, + sorted: None, + } + } + + pub fn sort_by_mz(&mut self) { + match self.sorted { + Some(SortingOrder::Mz) => (), + _ => { + self.raw_peaks + .sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); + self.sorted = Some(SortingOrder::Mz); + }, + } + } + + pub fn sort_by_mobility(&mut self) { + match self.sorted { + Some(SortingOrder::Mobility) => (), + _ => { + self.raw_peaks + .sort_unstable_by(|a, b| a.mobility.partial_cmp(&b.mobility).unwrap()); + self.sorted = Some(SortingOrder::Mobility); + }, + } + } +} diff --git a/src/ms/frames.rs b/src/ms/frames/frame_slice.rs similarity index 71% rename from src/ms/frames.rs rename to src/ms/frames/frame_slice.rs index 3db5c6c..6f8c449 100644 --- a/src/ms/frames.rs +++ b/src/ms/frames/frame_slice.rs @@ -1,73 +1,11 @@ use std::fmt; -use std::ops::Index; -use std::slice::SliceIndex; - -use rand::seq::index; -pub use timsrust::Frame; -pub use timsrust::FrameType; -pub use timsrust::{ - ConvertableIndex, FileReader, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, -}; - -use crate::ms::tdf::{DIAFrameInfo, ScanRange}; -use crate::space::space_generics::NDPoint; -use crate::space::space_generics::{AsNDPointsAtIndex, HasIntensity, IntenseAtIndex}; - -use log::info; - -#[derive(Debug, Clone, Copy)] -pub struct TimsPeak { - pub intensity: u32, - pub mz: f64, - pub mobility: f32, - pub npeaks: u32, -} - -impl HasIntensity for TimsPeak { - fn intensity(&self) -> u64 { - self.intensity as u64 - } -} - -#[derive(Debug, Clone, Copy)] -pub struct RawTimsPeak { - pub intensity: u32, - pub tof_index: u32, - pub scan_index: usize, -} +use timsrust::{Frame, FrameType}; -#[derive(Debug, Clone, Copy)] -pub struct RawTimsPeakReference<'a> { - pub intensity: &'a u32, - pub tof_index: &'a u32, - pub scan_index: &'a usize, -} - -impl HasIntensity for RawTimsPeak { - fn intensity(&self) -> u64 { - self.intensity as u64 - } -} - -impl<'a> HasIntensity for RawTimsPeakReference<'a> { - fn intensity(&self) -> u64 { - *self.intensity as u64 - } -} +use crate::space::space_generics::{ + AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, +}; -fn _check_peak_sanity(peak: &TimsPeak) { - debug_assert!(peak.intensity > 0); - debug_assert!(peak.mz > 0.); - debug_assert!(peak.mobility > 0.); - debug_assert!(peak.npeaks > 0); -} - -#[derive(Debug, Clone, Copy)] -pub enum SortingOrder { - Mz, - Mobility, - Intensity, -} +use super::FrameMsMsWindowInfo; #[derive(Debug, Clone, Copy)] pub enum ScanNumberType { @@ -121,118 +59,6 @@ impl fmt::Display for ScanOutOfBoundsError { } } -/// Information on the context of a window in a frame. -/// -/// This adds to a frame slice the context of the what isolation was used -/// to generate the frame slice. -#[derive(Debug, Clone)] -pub struct FrameMsMsWindowInfo { - pub mz_start: f32, - pub mz_end: f32, - pub window_group_id: usize, - pub within_window_quad_group_id: usize, - pub global_quad_row_id: usize, -} - -pub trait FramePointTolerance { - fn tof_index_range( - &self, - tof_index: u32, - ) -> (u32, u32); - fn scan_range( - &self, - scan_index: ScanNumberType, - ) -> (ScanNumberType, ScanNumberType); -} - -struct AbsoluteFramePointTolerance { - tof_index_tolerance: u32, - scan_tolerance: usize, -} - -impl FramePointTolerance for AbsoluteFramePointTolerance { - fn tof_index_range( - &self, - tof_index: u32, - ) -> (u32, u32) { - let tof_index_tolerance = self.tof_index_tolerance; - ( - tof_index.saturating_sub(tof_index_tolerance), - tof_index.saturating_add(tof_index_tolerance), - ) - } - - fn scan_range( - &self, - scan_index: ScanNumberType, - ) -> (ScanNumberType, ScanNumberType) { - match scan_index { - ScanNumberType::Global(x) => { - let scan_tolerance = self.scan_tolerance; - ( - ScanNumberType::Global(x.saturating_sub(scan_tolerance)), - ScanNumberType::Global(x + scan_tolerance), - ) - }, - ScanNumberType::Local(x) => { - let scan_tolerance = self.scan_tolerance; - ( - ScanNumberType::Local(x.saturating_sub(scan_tolerance)), - ScanNumberType::Local(x + scan_tolerance), - ) - }, - } - } -} - -type Range = (usize, usize); - -pub struct RangeSet { - ranges: Vec, - offset: usize, -} - -impl RangeSet { - fn extend( - &mut self, - other: RangeSet, - ) { - let new_offset = self.offset.min(other.offset); - let vs_self_offset = self.offset - new_offset; - let vs_other_offset = other.offset - new_offset; - - for item in self.ranges.iter_mut() { - item.0 += vs_self_offset; - item.1 += vs_self_offset; - } - - for item in other.ranges.iter() { - self.ranges - .push((item.0 + vs_other_offset, item.1 + vs_other_offset)); - } - - self.ranges.sort_unstable_by(|a, b| a.0.cmp(&b.0)); - } - - fn any_overlap(&self) -> bool { - let mut last_end = 0; - - for range in self.ranges.iter() { - if range.0 < last_end { - return true; - } - last_end = range.1; - } - false - } -} - -#[derive(Debug, Clone)] -pub enum MsMsFrameSliceWindowInfo { - WindowGroup(usize), - SingleWindow(FrameMsMsWindowInfo), -} - /// Unprocessed data from a 'Frame' after breaking by quad isolation_window + ims window. /// /// 1. every tof-index + intensity represents a peak. @@ -531,6 +357,13 @@ impl<'a> FrameSlice<'a> { Some(ranges) } } + + pub fn tof_int_at_index( + &self, + index: usize, + ) -> (u32, u32) { + (self.tof_indices[index], self.intensities[index]) + } } // Tests for the FrameSlice @@ -757,17 +590,16 @@ impl<'a> IntenseAtIndex for FrameSlice<'a> { // } } -impl<'a> AsNDPointsAtIndex<3> for FrameSlice<'a> { +impl<'a> AsNDPointsAtIndex<2> for FrameSlice<'a> { fn get_ndpoint( &self, index: usize, - ) -> NDPoint<3> { - let intensity = self.intensities[index]; + ) -> NDPoint<2> { let tof_index = self.tof_indices[index]; let scan_index = self.global_scan_at_index(index); NDPoint { - values: [tof_index as f32, scan_index as f32, intensity as f32], + values: [tof_index as f32, scan_index as f32], } } @@ -776,209 +608,155 @@ impl<'a> AsNDPointsAtIndex<3> for FrameSlice<'a> { } } -#[derive(Debug, Clone)] -pub struct DenseFrame { - pub raw_peaks: Vec, - pub index: usize, - pub rt: f64, - pub frame_type: FrameType, - pub sorted: Option, -} - -#[derive(Debug, Clone)] -pub struct DenseFrameWindow { - pub frame: DenseFrame, - pub ims_min: f32, - pub ims_max: f32, - pub mz_start: f64, - pub mz_end: f64, - pub group_id: usize, - pub quad_group_id: usize, -} - -impl DenseFrameWindow { - pub fn from_frame_window( - frame_window: &FrameSlice, - ims_converter: &Scan2ImConverter, - mz_converter: &Tof2MzConverter, - dia_info: &DIAFrameInfo, - ) -> DenseFrameWindow { - let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { - None => { - panic!("No window info") - // This branch points to an error in logic ... - // The window info should always be present in this context. - }, - Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { - // This branch should be easy to implement for things like synchro pasef... - // Some details to iron out though ... - panic!("Not implemented") - }, - Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { - let window_group_id = x.window_group_id; - let ww_quad_group_id = x.within_window_quad_group_id; - let scan_start = frame_window.scan_start; - (window_group_id, ww_quad_group_id, scan_start) - }, - }; - - // NOTE: I am swapping here the 'scan start' to be the `ims_end` because - // the first scans have lower 1/k0 values. - let ims_max = ims_converter.convert(scan_start as u32) as f32; - let ims_min = - ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; - - debug_assert!(ims_max <= ims_min); - - let scan_range: Option<&ScanRange> = - dia_info.get_quad_windows(window_group_id, ww_quad_group_id); - let scan_range = match scan_range { - Some(x) => x, - None => { - panic!( - "No scan range for window_group_id: {}, within_window_quad_group_id: {}", - window_group_id, ww_quad_group_id - ); +impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSlice<'a> { + fn query_ndpoint( + &'a self, + point: &NDPoint<2>, + ) -> Vec<&'a usize> { + let tof_index = point.values[0] as i32; + let scan_index = point.values[1] as usize; + let rangesets = self.matching_rangeset( + tof_index, + ScanNumberType::Global(scan_index), + &AbsoluteFramePointTolerance { + tof_index_tolerance: 1, + scan_tolerance: 1, }, - }; - - let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); + ); - DenseFrameWindow { - frame, - ims_min, - ims_max, - mz_start: scan_range.iso_low as f64, - mz_end: scan_range.iso_high as f64, - group_id: window_group_id, - quad_group_id: ww_quad_group_id, + let mut out = Vec::new(); + if let Some(rangesets) = rangesets { + for range in rangesets.ranges.iter() { + for i in range.0..range.1 { + out.push(&i); + } + } } + out } -} -impl DenseFrame { - pub fn from_frame( - frame: &Frame, - ims_converter: &Scan2ImConverter, - mz_converter: &Tof2MzConverter, - ) -> DenseFrame { - let mut expanded_scan_indices = Vec::with_capacity(frame.tof_indices.len()); - let mut last_scan_offset = frame.scan_offsets[0]; - for (scan_index, index_offset) in frame.scan_offsets[1..].iter().enumerate() { - let num_tofs = index_offset - last_scan_offset; - - let ims = ims_converter.convert(scan_index as u32) as f32; - expanded_scan_indices.extend(vec![ims; num_tofs]); - last_scan_offset = *index_offset; + fn query_ndrange( + &'a self, + boundary: &NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec<&'a usize> { + let tol = AbsoluteFramePointTolerance { + tof_index_tolerance: (boundary.widths[0] / 2.) as u32, + scan_tolerance: (boundary.widths[1] / 2.) as usize, + }; + let rangesets = self.matching_rangeset( + boundary.centers[0] as i32, + ScanNumberType::Global(boundary.centers[1] as usize), + &tol, + ); + + let mut out = Vec::new(); + if let Some(rangesets) = rangesets { + for range in rangesets.ranges.iter() { + for i in range.0..range.1 { + out.push(&i); + } + } } + out + } +} - let peaks = expanded_scan_indices - .iter() - .zip(frame.tof_indices.iter()) - .zip(frame.intensities.iter()) - .map(|((scan_index, tof_index), intensity)| TimsPeak { - intensity: *intensity, - mz: mz_converter.convert(*tof_index), - mobility: *scan_index, - npeaks: 1, - }) - .collect::>(); +pub trait FramePointTolerance { + fn tof_index_range( + &self, + tof_index: u32, + ) -> (u32, u32); + fn scan_range( + &self, + scan_index: ScanNumberType, + ) -> (ScanNumberType, ScanNumberType); +} - if cfg!(debug_assertions) { - for peak in peaks.iter() { - _check_peak_sanity(peak); - } - } +struct AbsoluteFramePointTolerance { + tof_index_tolerance: u32, + scan_tolerance: usize, +} - let index = frame.index; - let rt = frame.rt; - let frame_type = frame.frame_type; +impl FramePointTolerance for AbsoluteFramePointTolerance { + fn tof_index_range( + &self, + tof_index: u32, + ) -> (u32, u32) { + let tof_index_tolerance = self.tof_index_tolerance; + ( + tof_index.saturating_sub(tof_index_tolerance), + tof_index.saturating_add(tof_index_tolerance), + ) + } - DenseFrame { - raw_peaks: peaks, - index, - rt, - frame_type, - sorted: None, + fn scan_range( + &self, + scan_index: ScanNumberType, + ) -> (ScanNumberType, ScanNumberType) { + match scan_index { + ScanNumberType::Global(x) => { + let scan_tolerance = self.scan_tolerance; + ( + ScanNumberType::Global(x.saturating_sub(scan_tolerance)), + ScanNumberType::Global(x + scan_tolerance), + ) + }, + ScanNumberType::Local(x) => { + let scan_tolerance = self.scan_tolerance; + ( + ScanNumberType::Local(x.saturating_sub(scan_tolerance)), + ScanNumberType::Local(x + scan_tolerance), + ) + }, } } +} - pub fn from_frame_window( - frame_window: &FrameSlice, - ims_converter: &Scan2ImConverter, - mz_converter: &Tof2MzConverter, - ) -> DenseFrame { - let mut expanded_scan_indices = Vec::with_capacity(frame_window.tof_indices.len()); - let mut last_scan_offset = frame_window.scan_offsets[0]; - for (scan_index, index_offset) in frame_window.scan_offsets[1..].iter().enumerate() { - let num_tofs = index_offset - last_scan_offset; - let scan_index_use = (scan_index + frame_window.scan_start) as u32; - - let ims = ims_converter.convert(scan_index as f64) as f32; - if ims < 0.0 { - info!("Negative IMS value: {}", ims); - info!("scan_index_use: {}", scan_index_use); - info!("scan_index: {}", scan_index); - info!("frame_window.scan_start: {}", frame_window.scan_start); - } - debug_assert!(ims >= 0.0); - expanded_scan_indices.extend(vec![ims; num_tofs]); - last_scan_offset = *index_offset; - } - debug_assert!(last_scan_offset == frame_window.tof_indices.len()); - - let peaks = expanded_scan_indices - .iter() - .zip(frame_window.tof_indices.iter()) - .zip(frame_window.intensities.iter()) - .map(|((scan_index, tof_index), intensity)| TimsPeak { - intensity: *intensity, - mz: mz_converter.convert(*tof_index), - mobility: *scan_index, - npeaks: 1, - }) - .collect::>(); +type Range = (usize, usize); - if cfg!(debug_assertions) { - for peak in peaks.iter() { - _check_peak_sanity(peak); - } - } +pub struct RangeSet { + ranges: Vec, + offset: usize, +} - let index = frame_window.parent_frame_index; - let rt = frame_window.rt; - let frame_type = frame_window.frame_type; +impl RangeSet { + fn extend( + &mut self, + other: RangeSet, + ) { + let new_offset = self.offset.min(other.offset); + let vs_self_offset = self.offset - new_offset; + let vs_other_offset = other.offset - new_offset; - DenseFrame { - raw_peaks: peaks, - index, - rt, - frame_type, - sorted: None, + for item in self.ranges.iter_mut() { + item.0 += vs_self_offset; + item.1 += vs_self_offset; } - } - pub fn sort_by_mz(&mut self) { - match self.sorted { - Some(SortingOrder::Mz) => (), - _ => { - self.raw_peaks - .sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); - self.sorted = Some(SortingOrder::Mz); - }, + for item in other.ranges.iter() { + self.ranges + .push((item.0 + vs_other_offset, item.1 + vs_other_offset)); } + + self.ranges.sort_unstable_by(|a, b| a.0.cmp(&b.0)); } - pub fn sort_by_mobility(&mut self) { - match self.sorted { - Some(SortingOrder::Mobility) => (), - _ => { - self.raw_peaks - .sort_unstable_by(|a, b| a.mobility.partial_cmp(&b.mobility).unwrap()); - self.sorted = Some(SortingOrder::Mobility); - }, + fn any_overlap(&self) -> bool { + let mut last_end = 0; + + for range in self.ranges.iter() { + if range.0 < last_end { + return true; + } + last_end = range.1; } + false } } -pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); +#[derive(Debug, Clone)] +pub enum MsMsFrameSliceWindowInfo { + WindowGroup(usize), + SingleWindow(FrameMsMsWindowInfo), +} diff --git a/src/ms/frames/frames.rs b/src/ms/frames/frames.rs new file mode 100644 index 0000000..5e011bc --- /dev/null +++ b/src/ms/frames/frames.rs @@ -0,0 +1,76 @@ +pub use timsrust::Frame; +pub use timsrust::FrameType; +pub use timsrust::{ + ConvertableIndex, FileReader, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, +}; + +use crate::space::space_generics::HasIntensity; + +#[derive(Debug, Clone, Copy)] +pub struct TimsPeak { + pub intensity: u32, + pub mz: f64, + pub mobility: f32, + pub npeaks: u32, +} + +impl HasIntensity for TimsPeak { + fn intensity(&self) -> u64 { + self.intensity as u64 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct RawTimsPeak { + pub intensity: u32, + pub tof_index: u32, + pub scan_index: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct RawTimsPeakReference<'a> { + pub intensity: &'a u32, + pub tof_index: &'a u32, + pub scan_index: &'a usize, +} + +impl HasIntensity for RawTimsPeak { + fn intensity(&self) -> u64 { + self.intensity as u64 + } +} + +impl<'a> HasIntensity for RawTimsPeakReference<'a> { + fn intensity(&self) -> u64 { + *self.intensity as u64 + } +} + +#[derive(Debug, Clone, Copy)] +pub enum SortingOrder { + Mz, + Mobility, + Intensity, +} + +#[derive(Debug, Clone)] +pub struct DenseFrame { + pub raw_peaks: Vec, + pub index: usize, + pub rt: f64, + pub frame_type: FrameType, + pub sorted: Option, +} + +/// Information on the context of a window in a frame. +/// +/// This adds to a frame slice the context of the what isolation was used +/// to generate the frame slice. +#[derive(Debug, Clone)] +pub struct FrameMsMsWindowInfo { + pub mz_start: f32, + pub mz_end: f32, + pub window_group_id: usize, + pub within_window_quad_group_id: usize, + pub global_quad_row_id: usize, +} diff --git a/src/ms/frames/mod.rs b/src/ms/frames/mod.rs new file mode 100644 index 0000000..11e84b4 --- /dev/null +++ b/src/ms/frames/mod.rs @@ -0,0 +1,6 @@ +pub mod dense_frame_window; +pub mod frame_slice; +pub mod frames; +pub use dense_frame_window::{Converters, DenseFrameWindow}; +pub use frame_slice::{FrameSlice, MsMsFrameSliceWindowInfo}; +pub use frames::{DenseFrame, FrameMsMsWindowInfo, TimsPeak}; diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 3d5ba83..4cb2d1b 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -204,14 +204,14 @@ pub trait NDPointConverter { &self, elem: &T, ) -> NDPoint; - fn convert_vec( + fn convert_iter( &self, - elems: &[T], - ) -> (Vec>, NDBoundary) { - let points = elems - .iter() - .map(|elem| self.convert(elem)) - .collect::>(); + elems: IT, + ) -> (Vec>, NDBoundary) + where + IT: ExactSizeIterator, + { + let points = elems.map(|elem| self.convert(&elem)).collect::>(); let boundary = NDBoundary::from_ndpoints(&points); (points, boundary) } From 1cd060023e59f7e273e1ba4d1555722db62b7d9a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Fri, 19 Jul 2024 15:26:08 -0700 Subject: [PATCH 18/26] (wip) progress towards full implementation of sliding window denoising --- src/aggregation/aggregators.rs | 17 ++++---- src/aggregation/dbscan/dbscan.rs | 45 ++++++++------------- src/aggregation/dbscan/denseframe_dbscan.rs | 18 ++++++++- src/aggregation/dbscan/runner.rs | 24 +++++------ src/aggregation/ms_denoise.rs | 39 +++++++++++------- src/aggregation/tracing.rs | 36 ++++++++++++++++- src/ms/frames/frame_slice.rs | 14 ++++--- src/space/kdtree.rs | 13 +++--- src/space/quad.rs | 10 ++--- src/space/space_generics.rs | 45 +++++++++++++++++---- 10 files changed, 172 insertions(+), 89 deletions(-) diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index e6ee845..ce23c0b 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,5 +1,5 @@ use crate::ms::frames::TimsPeak; -use crate::space::space_generics::{HasIntensity, IntenseAtIndex}; +use crate::space::space_generics::{AsAggregableAtIndex, HasIntensity, IntenseAtIndex}; use crate::utils; use std::ops::Index; @@ -81,7 +81,7 @@ impl ClusterAggregator for TimsPeakAggregator { pub fn aggregate_clusters< T: Send + Clone + Copy, - RE: Index + Sync + Send + ?Sized, + RE: AsAggregableAtIndex + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, @@ -125,7 +125,7 @@ pub fn aggregate_clusters< fn parallel_aggregate_clusters< T: Send + Clone + Copy, - RE: Index + Sync + Send + ?Sized, + RE: AsAggregableAtIndex + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, @@ -144,7 +144,8 @@ fn parallel_aggregate_clusters< .filter_map(|(point_index, x)| match x { ClusterLabel::Cluster(cluster_id) => { let cluster_idx = *cluster_id as usize - 1; - let tmp: Option<(usize, T)> = Some((cluster_idx, elements[point_index])); + let tmp: Option<(usize, T)> = + Some((cluster_idx, elements.get_aggregable_at_index(point_index))); tmp }, _ => None, @@ -201,7 +202,7 @@ fn parallel_aggregate_clusters< .iter() .map(|i| { let mut oe = def_aggregator(); - oe.add(&elements[*i]); + oe.add(&elements.get_aggregable_at_index(*i)); oe }) .collect::>(); @@ -214,7 +215,7 @@ fn parallel_aggregate_clusters< fn serial_aggregate_clusters< T: Send + Clone + Copy, - RE: Index + Sync + Send + ?Sized, + RE: AsAggregableAtIndex + Sync + Send + ?Sized, G: Sync + Send + ClusterAggregator, R: Send, F: Fn() -> G + Send + Sync, @@ -234,12 +235,12 @@ fn serial_aggregate_clusters< match cluster_label { ClusterLabel::Cluster(cluster_id) => { let cluster_idx = *cluster_id as usize - 1; - cluster_vecs[cluster_idx].add(&(elements[point_index])); + cluster_vecs[cluster_idx].add(&(elements.get_aggregable_at_index(point_index))); }, ClusterLabel::Noise => { if keep_unclustered { let mut oe = def_aggregator(); - oe.add(&elements[point_index]); + oe.add(&elements.get_aggregable_at_index(point_index)); unclustered_points.push(oe); } }, diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index 8ab3eb9..72d5297 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,8 +1,8 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; use crate::space::kdtree::RadiusKDTree; use crate::space::space_generics::{ - convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, - NDPoint, NDPointConverter, QueriableIndexedPoints, + convert_to_bounds_query, AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, + IntenseAtIndex, NDPoint, NDPointConverter, QueriableIndexedPoints, }; use crate::utils::{self, ContextTimer}; use log::{debug, info, trace}; @@ -20,10 +20,10 @@ fn reassign_centroid< const N: usize, T: Send + Clone + Copy, C: NDPointConverter, - I: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync, + I: QueriableIndexedPoints<'a, N> + std::marker::Sync, G: Sync + Send + ClusterAggregator, R: Send, - RE: Send + Sync + Index + ?Sized, + RE: Send + Sync + AsAggregableAtIndex + ?Sized, F: Fn() -> G + Send + Sync, >( centroids: Vec, @@ -49,7 +49,7 @@ fn reassign_centroid< let mut aggregator = def_aggregator(); let mut num_agg = 0; for neighbor in neighbors { - aggregator.add(&elements[*neighbor]); + aggregator.add(&elements.get_aggregable_at_index(neighbor)); num_agg += 1; } trace!("Aggregated {} elements", num_agg); @@ -87,7 +87,7 @@ pub fn dbscan_generic< + IntoIterator + Send + Sync - + Index + + AsAggregableAtIndex + ?Sized, F: Fn() -> G + Send + Sync, D: Send + Sync, @@ -115,7 +115,7 @@ where let timer = utils::ContextTimer::new("dbscan_generic", true, log_level); let mut i_timer = timer.start_sub_timer("conversion"); - let (ndpoints, boundary) = converter.convert_iter(prefiltered_peaks.into_iter()); + let (ndpoints, boundary) = converter.convert_aggregables(prefiltered_peaks); i_timer.stop(true); let mut i_timer = timer.start_sub_timer("tree"); @@ -134,7 +134,7 @@ where timer, min_n, min_intensity, - def_aggregator, + &def_aggregator, extra_filter_fun, log_level, keep_unclustered, @@ -159,14 +159,8 @@ where pub fn dbscan_aggregate< 'a, const N: usize, - RE: IntenseAtIndex - + DistantAtIndex - + IntoIterator - + Send - + Sync - + Index - + ?Sized, - IND: QueriableIndexedPoints<'a, N, usize> + std::marker::Sync + Send, + RE: IntenseAtIndex + DistantAtIndex + AsAggregableAtIndex + Send + Sync + ?Sized, + IND: QueriableIndexedPoints<'a, N> + std::marker::Sync + Send, NAI: AsNDPointsAtIndex + std::marker::Sync + Send, T: HasIntensity + Send + Clone + Copy + Sync, D: Send + Sync, @@ -174,27 +168,22 @@ pub fn dbscan_aggregate< R: Send, F: Fn() -> G + Send + Sync, >( - prefiltered_peaks: &RE, - ndpoints: &NAI, - index: &IND, + prefiltered_peaks: &'a RE, + ndpoints: &'a NAI, + index: &'a IND, timer: ContextTimer, min_n: usize, min_intensity: u64, def_aggregator: F, - extra_filter_fun: Option<&(dyn Fn(&D) -> bool + Send + Sync)>, + extra_filter_fun: Option<&'a (dyn Fn(&D) -> bool + Send + Sync)>, log_level: utils::LogLevel, keep_unclustered: bool, - max_extension_distances: &[f32; N], + max_extension_distances: &'a [f32; N], show_progress: bool, ) -> Vec { let mut i_timer = timer.start_sub_timer("pre-sort"); - let mut intensity_sorted_indices = prefiltered_peaks - .into_iter() - .enumerate() - .map(|(i, peak)| (i, peak.intensity())) - .collect::>(); + let intensity_sorted_indices = prefiltered_peaks.intensity_sorted_indices(); - intensity_sorted_indices.par_sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); i_timer.stop(true); let mut i_timer = timer.start_sub_timer("dbscan"); @@ -204,7 +193,7 @@ pub fn dbscan_aggregate< ndpoints, min_n, min_intensity, - &intensity_sorted_indices, + intensity_sorted_indices, extra_filter_fun, show_progress, max_extension_distances, diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 3d5f83f..39ecfc6 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -2,7 +2,7 @@ use crate::aggregation::aggregators::TimsPeakAggregator; use crate::aggregation::converters::{BypassDenseFrameBackConverter, DenseFrameConverter}; use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::ms::frames::{DenseFrame, TimsPeak}; -use crate::space::space_generics::{DistantAtIndex, IntenseAtIndex}; +use crate::space::space_generics::{AsAggregableAtIndex, DistantAtIndex, IntenseAtIndex}; use crate::utils::within_distance_apply; // bool> @@ -74,6 +74,22 @@ impl IntenseAtIndex for Vec { ) -> u64 { self[index].intensity as u64 } + fn intensity_index_length(&self) -> usize { + self.len() + } +} + +impl AsAggregableAtIndex for Vec { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> TimsPeak { + self[index] + } + + fn num_aggregable(&self) -> usize { + self.len() + } } impl DistantAtIndex for Vec { diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index b266842..29740a5 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,8 +1,8 @@ -use crate::space::space_generics::NDPointConverter; use crate::space::space_generics::{ convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPoint, QueriableIndexedPoints, }; +use crate::space::space_generics::{AsAggregableAtIndex, NDPointConverter}; use std::marker::PhantomData; use std::ops::Index; @@ -224,8 +224,8 @@ where DAI: DistantAtIndex + ?Sized, { raw_elements: &'a PP, // &'a Vec, - intensity_sorted_indices: &'a Vec<(usize, u64)>, - indexed_points: &'a (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), + intensity_sorted_indices: Vec<(usize, u64)>, + indexed_points: &'a (dyn QueriableIndexedPoints<'a, N> + std::marker::Sync), projected_elements: &'a PE, // [NDPoint], raw_dist: &'a DAI, _phantom_metric: PhantomData, @@ -267,8 +267,8 @@ where fn run( &self, raw_elements: &'b PP, // Vec, // trait impl Index - intensity_sorted_indices: &'b Vec<(usize, u64)>, - indexed_points: &'b (dyn QueriableIndexedPoints<'a, N, usize> + std::marker::Sync), + intensity_sorted_indices: Vec<(usize, u64)>, + indexed_points: &'b (dyn QueriableIndexedPoints<'a, N> + std::marker::Sync), projected_elements: &'b PE, //[NDPoint], // trait impl AsNDPointAtIndex> raw_distance_calculator: &'b DAI, ) -> ClusterLabels @@ -292,7 +292,7 @@ where let points: DBSCANPoints = DBSCANPoints { raw_elements, - intensity_sorted_indices, + intensity_sorted_indices: intensity_sorted_indices, indexed_points, projected_elements, raw_dist: raw_distance_calculator, @@ -411,7 +411,7 @@ where .indexed_points .query_ndrange(&query_elems.0, query_elems.1) .iter() - .map(|x| **x) + .map(|x| *x) .collect::>(); timers.outer_loop_nn_timer.stop(false); @@ -537,7 +537,7 @@ where .indexed_points .query_ndrange(&inner_query_elems.0, inner_query_elems.1) .iter_mut() - .map(|x| **x) + .map(|x| *x) .collect::>(); timers.inner_loop_nn_timer.stop(false); local_neighbors @@ -717,18 +717,18 @@ where pub fn dbscan_label_clusters< 'a, const N: usize, - RE: IntenseAtIndex + DistantAtIndex + Send + Sync + Index + ?Sized, - T: QueriableIndexedPoints<'a, N, usize> + Send + std::marker::Sync, + RE: IntenseAtIndex + DistantAtIndex + Send + Sync + AsAggregableAtIndex + ?Sized, + T: QueriableIndexedPoints<'a, N> + Send + std::marker::Sync, PE: AsNDPointsAtIndex + Send + Sync + ?Sized, D: Send + Sync, - E: Send + Sync, + E: Send + Sync + Copy, >( indexed_points: &'a T, raw_elements: &'a RE, projected_elements: &'a PE, // [NDPoint], min_n: usize, min_intensity: u64, - intensity_sorted_indices: &'a Vec<(usize, u64)>, + intensity_sorted_indices: Vec<(usize, u64)>, filter_fun: Option<&'a (dyn Fn(&D) -> bool + Send + Sync)>, progress: bool, max_extension_distances: &'a [f32; N], diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 8d6cc5f..6466a6f 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -11,6 +11,7 @@ use crate::ms::frames::FrameSlice; use crate::ms::frames::TimsPeak; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; +use crate::space::space_generics::AsAggregableAtIndex; use crate::space::space_generics::AsNDPointsAtIndex; use crate::space::space_generics::DistantAtIndex; use crate::space::space_generics::IntenseAtIndex; @@ -197,15 +198,13 @@ impl FrameSliceWindow<'_> { } } -impl Index for FrameSliceWindow<'_> { - type Output = MaybeIntenseRawPeak; - - fn index( +impl AsAggregableAtIndex for FrameSliceWindow<'_> { + fn get_aggregable_at_index( &self, index: usize, - ) -> &Self::Output { + ) -> MaybeIntenseRawPeak { let (pos, within_window_index) = self.get_window_index(index); - let tmp = self.window[pos]; + let tmp = &self.window[pos]; let (tof, int) = tmp.tof_int_at_index(within_window_index); let foo = MaybeIntenseRawPeak { intensity: int, @@ -213,7 +212,11 @@ impl Index for FrameSliceWindow<'_> { scan_index: tmp.global_scan_at_index(within_window_index), weight_only: pos != self.reference_index, }; - &foo + foo + } + + fn num_aggregable(&self) -> usize { + self.cum_lengths.last().unwrap().clone() } } @@ -237,18 +240,23 @@ impl IntenseAtIndex for FrameSliceWindow<'_> { let (pos, within_window_index) = self.get_window_index(index); self.window[pos].weight_at_index(within_window_index) } + + fn intensity_index_length(&self) -> usize { + self.cum_lengths.last().unwrap().clone() + } } -impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSliceWindow<'a> { +impl<'a> QueriableIndexedPoints<'a, 2> for FrameSliceWindow<'a> { fn query_ndpoint( &'a self, point: &NDPoint<2>, - ) -> Vec<&'a usize> { + ) -> Vec { let mut out = Vec::new(); - for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths).enumerate() { + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() + { let local_outs = frame.query_ndpoint(point); for ii in local_outs { - out.push(&(ii + cum_length)); + out.push(ii + cum_length); } } out @@ -258,12 +266,13 @@ impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSliceWindow<'a> { &'a self, boundary: &crate::space::space_generics::NDBoundary<2>, reference_point: Option<&NDPoint<2>>, - ) -> Vec<&'a usize> { + ) -> Vec { let mut out = Vec::new(); - for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths).enumerate() { + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() + { let local_outs = frame.query_ndrange(boundary, reference_point); for ii in local_outs { - out.push(&(ii + cum_length)); + out.push(ii + cum_length); } } out @@ -425,7 +434,7 @@ fn denoise_frame_slice_window( &fsw, min_n, min_intensity, - &intensity_sorted_indices, + intensity_sorted_indices, None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, false, &[10., 100.], diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 5d505e5..7645ba0 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -5,7 +5,7 @@ use crate::aggregation::chromatograms::{ use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::ms::frames::DenseFrameWindow; use crate::space::space_generics::{ - DistantAtIndex, HasIntensity, NDPoint, NDPointConverter, TraceLike, + AsAggregableAtIndex, DistantAtIndex, HasIntensity, NDPoint, NDPointConverter, TraceLike, }; use crate::space::space_generics::{IntenseAtIndex, NDBoundary}; use crate::utils; @@ -438,6 +438,23 @@ impl IntenseAtIndex for Vec { ) -> u64 { self[index].intensity } + + fn intensity_index_length(&self) -> usize { + self.len() + } +} + +impl AsAggregableAtIndex for Vec { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> TimeTimsPeak { + self[index] + } + + fn num_aggregable(&self) -> usize { + self.len() + } } impl DistantAtIndex for Vec { @@ -703,6 +720,23 @@ impl IntenseAtIndex for Vec { ) -> u64 { self[index].intensity } + + fn intensity_index_length(&self) -> usize { + self.len() + } +} + +impl AsAggregableAtIndex for Vec { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> BaseTrace { + self[index] + } + + fn num_aggregable(&self) -> usize { + self.len() + } } struct BaseTraceDistance { diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index 6f8c449..62c38e7 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -572,6 +572,10 @@ impl<'a> IntenseAtIndex for FrameSlice<'a> { self.intensities[index] as u64 } + fn intensity_index_length(&self) -> usize { + self.intensities.len() + } + // fn get_intense_at_index( // &self, // index: usize, @@ -608,11 +612,11 @@ impl<'a> AsNDPointsAtIndex<2> for FrameSlice<'a> { } } -impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSlice<'a> { +impl<'a> QueriableIndexedPoints<'a, 2> for FrameSlice<'a> { fn query_ndpoint( &'a self, point: &NDPoint<2>, - ) -> Vec<&'a usize> { + ) -> Vec { let tof_index = point.values[0] as i32; let scan_index = point.values[1] as usize; let rangesets = self.matching_rangeset( @@ -628,7 +632,7 @@ impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSlice<'a> { if let Some(rangesets) = rangesets { for range in rangesets.ranges.iter() { for i in range.0..range.1 { - out.push(&i); + out.push(i); } } } @@ -639,7 +643,7 @@ impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSlice<'a> { &'a self, boundary: &NDBoundary<2>, reference_point: Option<&NDPoint<2>>, - ) -> Vec<&'a usize> { + ) -> Vec { let tol = AbsoluteFramePointTolerance { tof_index_tolerance: (boundary.widths[0] / 2.) as u32, scan_tolerance: (boundary.widths[1] / 2.) as usize, @@ -654,7 +658,7 @@ impl<'a> QueriableIndexedPoints<'a, 2, usize> for FrameSlice<'a> { if let Some(rangesets) = rangesets { for range in rangesets.ranges.iter() { for i in range.0..range.1 { - out.push(&i); + out.push(i); } } } diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index 9917b36..5df1beb 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -260,24 +260,25 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { } } -impl<'a, T, const D: usize> QueriableIndexedPoints<'a, D, T> for RadiusKDTree<'a, T, D> { +impl<'a, const D: usize> QueriableIndexedPoints<'a, D> for RadiusKDTree<'a, usize, D> { fn query_ndpoint( &'a self, point: &NDPoint, - ) -> Vec<&'a T> { - self.query(point) + ) -> Vec { + self.query(point).into_iter().map(|x| *x).collect() } fn query_ndrange( &'a self, boundary: &NDBoundary, reference_point: Option<&NDPoint>, - ) -> Vec<&'a T> { + ) -> Vec { let candidates = self.query_range(boundary); if let Some(point) = reference_point { - self.refine_query(point, candidates) + let tmp = self.refine_query(point, candidates); + tmp.into_iter().map(|x| *x).collect() } else { - candidates.iter().map(|x| x.1).collect() + candidates.iter().map(|x| *x.1).collect() } } } diff --git a/src/space/quad.rs b/src/space/quad.rs index e07c4b0..dbb0710 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -255,14 +255,14 @@ impl<'a, T> RadiusQuadTree<'a, T> { // TODO: rename count_neigh_monotonocally_increasing // because it can do more than just count neighbors.... -impl<'a, T> QueriableIndexedPoints<'a, 2, T> for RadiusQuadTree<'a, T> { +impl<'a> QueriableIndexedPoints<'a, 2> for RadiusQuadTree<'a, usize> { fn query_ndpoint( &'a self, point: &NDPoint<2>, - ) -> Vec<&'a T> { + ) -> Vec { self.query(point) .into_iter() - .map(|x| x.1) + .map(|x| *x.1) .collect::>() } @@ -270,7 +270,7 @@ impl<'a, T> QueriableIndexedPoints<'a, 2, T> for RadiusQuadTree<'a, T> { &'a self, boundary: &NDBoundary<2>, reference_point: Option<&NDPoint<2>>, - ) -> Vec<&'a T> { + ) -> Vec { let mut result = Vec::new(); self.query_range(boundary, &mut result); @@ -279,7 +279,7 @@ impl<'a, T> QueriableIndexedPoints<'a, 2, T> for RadiusQuadTree<'a, T> { None => result, } .into_iter() - .map(|x| x.1) + .map(|x| *x.1) .collect::>() } } diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 4cb2d1b..f02d1d5 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -1,3 +1,5 @@ +use rayon::prelude::ParallelSliceMut; + #[derive(Debug, Clone, Copy)] pub struct NDBoundary { pub starts: [f32; DIMENSIONALITY], @@ -102,17 +104,16 @@ pub struct NDPoint { pub values: [f32; DIMENSIONALITY], } -// Q: is there any instance where T is not usize? -pub trait QueriableIndexedPoints<'a, const N: usize, T> { +pub trait QueriableIndexedPoints<'a, const N: usize> { fn query_ndpoint( &'a self, point: &NDPoint, - ) -> Vec<&'a T>; + ) -> Vec; fn query_ndrange( &'a self, boundary: &NDBoundary, reference_point: Option<&NDPoint>, - ) -> Vec<&'a T>; + ) -> Vec; } pub trait AsNDPointsAtIndex { @@ -154,6 +155,28 @@ pub trait IntenseAtIndex { ) -> u64 { self.intensity_at_index(index) } + fn intensity_index_length(&self) -> usize; + fn intensity_sorted_indices(&self) -> Vec<(usize, u64)> { + let mut indices: Vec<(usize, u64)> = (0..self.intensity_index_length()) + .map(|i| (i, self.intensity_at_index(i))) + .collect(); + indices.par_sort_unstable_by_key(|&x| x.1); + indices + } +} + +pub trait AsAggregableAtIndex +where + // T: HasIntensity + Copy, + // I am not sure how I want to express this in the type system. + T: Copy, +{ + fn get_aggregable_at_index( + &self, + index: usize, + ) -> T; + + fn num_aggregable(&self) -> usize; } impl IntenseAtIndex for [T] @@ -166,6 +189,9 @@ where ) -> u64 { self[index].intensity() } + fn intensity_index_length(&self) -> usize { + self.len() + } } pub trait DistantAtIndex { @@ -204,14 +230,17 @@ pub trait NDPointConverter { &self, elem: &T, ) -> NDPoint; - fn convert_iter( + fn convert_aggregables( &self, - elems: IT, + elems: &IT, ) -> (Vec>, NDBoundary) where - IT: ExactSizeIterator, + IT: AsAggregableAtIndex + ?Sized, + T: Copy, { - let points = elems.map(|elem| self.convert(&elem)).collect::>(); + let points = (0..elems.num_aggregable()) + .map(|i| self.convert(&elems.get_aggregable_at_index(i))) + .collect::>(); let boundary = NDBoundary::from_ndpoints(&points); (points, boundary) } From c9b89934a55605d9912f94dfd65c0d4b962456a6 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 21 Jul 2024 00:29:19 -0700 Subject: [PATCH 19/26] added debug serialization and changed indexing strategy --- .gitignore | 2 +- Cargo.lock | 14 +- Cargo.toml | 8 +- README.md | 13 + src/aggregation/dbscan/dbscan.rs | 11 +- src/aggregation/dbscan/runner.rs | 99 ++++-- src/aggregation/ms_denoise.rs | 421 ++++++++----------------- src/aggregation/tracing.rs | 63 ++-- src/ms/frames/dense_frame_window.rs | 9 +- src/ms/frames/frame_slice.rs | 188 ++++++++++- src/ms/frames/frame_slice_rt_window.rs | 278 ++++++++++++++++ src/ms/frames/frames.rs | 12 +- src/ms/frames/mod.rs | 3 +- src/ms/tdf.rs | 1 + src/space/space_generics.rs | 11 + src/utils.rs | 97 +++++- 16 files changed, 828 insertions(+), 402 deletions(-) create mode 100644 src/ms/frames/frame_slice_rt_window.rs diff --git a/.gitignore b/.gitignore index cadeea2..ab77253 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ /tmp /run_1k /benchmark +/debug_jsons traces_debug.csv pseudoscans_debug.json - diff --git a/Cargo.lock b/Cargo.lock index 8f72f87..2defd14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1262,15 +1262,6 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" -[[package]] -name = "memmap2" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" -dependencies = [ - "libc", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -2326,14 +2317,13 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.2.4" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9301709a549fabb2d79f564c528b0af5ca0002bdf0055341cfcc07950a44290b" +checksum = "7a0791ad8b3836b6a582b1bfb563c04d0e09acdaf85e45c16d3158a0bcb164b6" dependencies = [ "bytemuck", "byteorder", "linreg", - "memmap2", "parquet", "rayon", "rusqlite", diff --git a/Cargo.toml b/Cargo.toml index 58f8f65..5717454 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ license = "Apache-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -timsrust = "0.2.2" +timsrust = "= 0.2.2" # Serialization serde = { version = "1.0.193", features = ["derive"] } @@ -32,6 +32,10 @@ toml = "0.8.8" [features] par_dataprep = [] +less_parallel = [] # Mostly for profiling reasons ... looking at flamegraphs is hard with rayon... -# [profile.release] +[profile.release] +lto = "thin" +panic = "abort" +# codegen-units = 1 # debug = true diff --git a/README.md b/README.md index 5660599..302d112 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,19 @@ There are a couple of features for development. RUST_LOG=info # will change the log level ... levels are standard (info, debug, warn, error, trace) RUST_BACKTRACE=1 # will show a backtrace on panic RAYON_NUM_THREADS=4 # will set the number of threads to use in rayon + +IONMESH_DEBUG_JSON_PATH + # Will save some intermeriate results + # as json into that dir +IONMESH_DEBUG_JSON_FREQUENCY + # Will control the frequency with which results are saved. + # 1 means every iteration, 2 every other, etc. + # They are done pseudo-randomly. +IONMESH_PROFILE_NUM_WINDOWS + # Will control the number of frame groups that will be used. + # If this is a diapasef run, it means a it will run only X + # quad isolation windows. For diagonal, it will run X diagonal + # windows. ``` ## Roadmap diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index 72d5297..7d78f4c 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -88,6 +88,7 @@ pub fn dbscan_generic< + Send + Sync + AsAggregableAtIndex + + std::fmt::Debug + ?Sized, F: Fn() -> G + Send + Sync, D: Send + Sync, @@ -159,8 +160,14 @@ where pub fn dbscan_aggregate< 'a, const N: usize, - RE: IntenseAtIndex + DistantAtIndex + AsAggregableAtIndex + Send + Sync + ?Sized, - IND: QueriableIndexedPoints<'a, N> + std::marker::Sync + Send, + RE: IntenseAtIndex + + DistantAtIndex + + AsAggregableAtIndex + + Send + + Sync + + std::fmt::Debug + + ?Sized, + IND: QueriableIndexedPoints<'a, N> + std::marker::Sync + Send + std::fmt::Debug, NAI: AsNDPointsAtIndex + std::marker::Sync + Send, T: HasIntensity + Send + Clone + Copy + Sync, D: Send + Sync, diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 29740a5..007ec8f 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -4,10 +4,10 @@ use crate::space::space_generics::{ }; use crate::space::space_generics::{AsAggregableAtIndex, NDPointConverter}; use std::marker::PhantomData; -use std::ops::Index; use crate::utils; use indicatif::ProgressIterator; +use log::trace; use crate::aggregation::aggregators::ClusterLabel; use crate::aggregation::dbscan::utils::FilterFunCache; @@ -217,25 +217,27 @@ struct DBSCANRunner<'a, const N: usize, D> { max_extension_distances: &'a [f32; N], } -struct DBSCANPoints<'a, const N: usize, PP, PE, DAI, E> +struct DBSCANPoints<'a, const N: usize, PP, PE, DAI, E, QIP> where PP: IntenseAtIndex + std::marker::Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { raw_elements: &'a PP, // &'a Vec, intensity_sorted_indices: Vec<(usize, u64)>, - indexed_points: &'a (dyn QueriableIndexedPoints<'a, N> + std::marker::Sync), + indexed_points: &'a QIP, projected_elements: &'a PE, // [NDPoint], raw_dist: &'a DAI, _phantom_metric: PhantomData, } -impl<'a, const N: usize, PP, QQ, D, E> DBSCANPoints<'a, N, PP, QQ, D, E> +impl<'a, const N: usize, PP, QQ, D, E, QIP> DBSCANPoints<'a, N, PP, QQ, D, E, QIP> where PP: IntenseAtIndex + std::marker::Send + ?Sized, QQ: AsNDPointsAtIndex + ?Sized, D: DistantAtIndex + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { fn get_intensity_at_index( &self, @@ -264,11 +266,11 @@ impl<'a, 'b: 'a, const N: usize, D> DBSCANRunner<'a, N, D> where D: Sync, { - fn run( + fn run( &self, raw_elements: &'b PP, // Vec, // trait impl Index intensity_sorted_indices: Vec<(usize, u64)>, - indexed_points: &'b (dyn QueriableIndexedPoints<'a, N> + std::marker::Sync), + indexed_points: &'b QIP, projected_elements: &'b PE, //[NDPoint], // trait impl AsNDPointAtIndex> raw_distance_calculator: &'b DAI, ) -> ClusterLabels @@ -276,6 +278,7 @@ where PP: IntenseAtIndex + Send + Sync + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync + std::fmt::Debug, { let usize_filterfun = match self.filter_fun { Some(filterfun) => { @@ -290,7 +293,11 @@ where let mut state = DBSCANRunnerState::new(intensity_sorted_indices.len(), usize_filterfun); - let points: DBSCANPoints = DBSCANPoints { + debug_assert!(intensity_sorted_indices.len() == raw_elements.intensity_index_length()); + debug_assert!(intensity_sorted_indices.len() == projected_elements.num_ndpoints()); + // trace!("Index: {:?}", indexed_points); + + let points: DBSCANPoints = DBSCANPoints { raw_elements, intensity_sorted_indices: intensity_sorted_indices, indexed_points, @@ -321,15 +328,16 @@ where state.cluster_labels } - fn process_points( + fn process_points( &self, mut state: DBSCANRunnerState, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, ) -> DBSCANRunnerState where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { let my_progbar = state.create_progress_bar(points.intensity_sorted_indices.len(), self.progress); @@ -352,10 +360,10 @@ where } /// This method gets applied to every point in decreasing intensity order. - fn process_single_point( + fn process_single_point( &self, point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, @@ -364,6 +372,7 @@ where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { if cluster_labels.get(point_index) != ClusterLabel::Unassigned { return; @@ -376,6 +385,9 @@ where timers, cc_metrics, ); + + // trace!("Neighbors: {:?}", neighbors); + if !self.is_core_point(&neighbors, points.raw_elements, timers) { cluster_labels.set_noise(point_index); return; @@ -391,10 +403,10 @@ where ); } - fn find_main_loop_neighbors( + fn find_main_loop_neighbors( &self, point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, @@ -403,16 +415,32 @@ where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { timers.outer_loop_nn_timer.reset_start(); let binding = points.projected_elements.get_ndpoint(point_index); let query_elems = convert_to_bounds_query(&binding); let mut candidate_neighbors = points .indexed_points - .query_ndrange(&query_elems.0, query_elems.1) - .iter() - .map(|x| *x) - .collect::>(); + .query_ndrange(&query_elems.0, query_elems.1); + + // trace!("Query elems: {:?}", query_elems); + // trace!("Candidate neighbors: {:?}", candidate_neighbors); + if cfg!(debug_assertions) { + // Make sure all generated neighbors are within the bounds. + for i in candidate_neighbors.iter() { + assert!( + *i < points.projected_elements.num_ndpoints(), + "Index: {} out of proj elems bounds", + i + ); + assert!( + *i < points.raw_elements.intensity_index_length(), + "Index: {} out of intensity bounds", + i + ); + } + } timers.outer_loop_nn_timer.stop(false); if filter_fun_cache.is_none() { @@ -465,11 +493,11 @@ where return neighbor_intensity_total >= self.min_intensity; } - fn main_loop_expand_cluster( + fn main_loop_expand_cluster( &self, apex_point_index: usize, neighbors: Vec, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, @@ -477,6 +505,7 @@ where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { cluster_labels.set_new_cluster(apex_point_index); let mut seed_set: Vec = neighbors; @@ -519,16 +548,17 @@ where } } - fn find_local_neighbors( + fn find_local_neighbors( &self, neighbor_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { timers.inner_loop_nn_timer.reset_start(); let binding = points.projected_elements.get_ndpoint(neighbor_index); @@ -543,12 +573,12 @@ where local_neighbors } - fn filter_neighbors_inner_loop( + fn filter_neighbors_inner_loop( &self, local_neighbors: Vec, cluster_apex_point_index: usize, current_center_point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, cluster_labels: &ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, @@ -557,6 +587,7 @@ where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { let filtered = self.apply_filter_fun( local_neighbors, @@ -580,17 +611,18 @@ where ) } - fn filter_by_apex_distance( + fn filter_by_apex_distance( &self, mut neighbors: Vec, apex_point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { timers.local_neighbor_filter_timer.reset_start(); let query_point = &points.projected_elements.get_ndpoint(apex_point_index); @@ -601,17 +633,18 @@ where neighbors } - fn is_extension_core_point( + fn is_extension_core_point( &self, neighbors: &[usize], current_center_point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, timers: &mut DBScanTimers, ) -> bool where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { timers.inner_intensity_calculation.reset_start(); let mut neighbor_intensity_total: u64 = neighbors @@ -635,17 +668,18 @@ where /// one could pass a function that checks if the chromatograms a high correlation. /// Because two might share the same point in space, intensity is not really /// relevant but co-elution might be critical. - fn apply_filter_fun( + fn apply_filter_fun( &self, local_neighbors: Vec, point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, filter_fun_cache: &mut Option, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { if let Some(cache) = filter_fun_cache { local_neighbors @@ -674,17 +708,18 @@ where neighbors } - fn filter_by_local_intensity_and_distance( + fn filter_by_local_intensity_and_distance( &self, mut neighbors: Vec, neighbor_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D>, + points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, timers: &mut DBScanTimers, ) -> Vec where PP: IntenseAtIndex + Send + ?Sized, PE: AsNDPointsAtIndex + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, + QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, { timers.local_neighbor_filter_timer.reset_start(); let query_intensity = points.raw_elements.intensity_at_index(neighbor_index); @@ -718,7 +753,7 @@ pub fn dbscan_label_clusters< 'a, const N: usize, RE: IntenseAtIndex + DistantAtIndex + Send + Sync + AsAggregableAtIndex + ?Sized, - T: QueriableIndexedPoints<'a, N> + Send + std::marker::Sync, + T: QueriableIndexedPoints<'a, N> + Send + std::marker::Sync + std::fmt::Debug, PE: AsNDPointsAtIndex + Send + Sync + ?Sized, D: Send + Sync, E: Send + Sync + Copy, diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 6466a6f..adfc542 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,34 +1,29 @@ use core::panic; -use std::ops::Index; +use serde::{Deserialize, Serialize}; -use crate::aggregation::dbscan::dbscan::dbscan_aggregate; use crate::aggregation::dbscan::denseframe_dbscan::dbscan_denseframe; -use crate::ms::frames::frames::RawTimsPeak; +use crate::ms::frames::frame_slice_rt_window::FrameSliceWindow; +use crate::ms::frames::frame_slice_rt_window::RawWeightedTimsPeakAggregator; use crate::ms::frames::Converters; use crate::ms::frames::DenseFrame; use crate::ms::frames::DenseFrameWindow; +use crate::ms::frames::ExpandedFrameSlice; use crate::ms::frames::FrameSlice; +use crate::ms::frames::MsMsFrameSliceWindowInfo; use crate::ms::frames::TimsPeak; use crate::ms::tdf; use crate::ms::tdf::DIAFrameInfo; -use crate::space::space_generics::AsAggregableAtIndex; use crate::space::space_generics::AsNDPointsAtIndex; -use crate::space::space_generics::DistantAtIndex; use crate::space::space_generics::IntenseAtIndex; -use crate::space::space_generics::NDPoint; -use crate::space::space_generics::QueriableIndexedPoints; use crate::utils; -use timsrust::ConvertableIndex; +use crate::utils::maybe_save_json_if_debugging; use indicatif::ParallelProgressIterator; use log::{info, trace, warn}; use rayon::prelude::*; -use serde::{Deserialize, Serialize}; use timsrust::Frame; use super::aggregators::aggregate_clusters; -use super::aggregators::ClusterAggregator; -use super::aggregators::TimsPeakAggregator; use super::dbscan::runner::dbscan_label_clusters; // TODO I can probably split the ms1 and ms2 ... @@ -151,245 +146,8 @@ fn _denoise_denseframe( denoised_frame } -#[derive(Debug)] -struct FrameSliceWindow<'a> { - window: &'a [FrameSlice<'a>], - reference_index: usize, - cum_lengths: Vec, -} - -#[derive(Debug, Clone, Copy)] -struct MaybeIntenseRawPeak { - intensity: u32, - tof_index: u32, - scan_index: usize, - weight_only: bool, -} - -impl FrameSliceWindow<'_> { - fn new<'a>(window: &'a [FrameSlice<'a>]) -> FrameSliceWindow<'a> { - let cum_lengths = window - .iter() - .map(|x| x.num_ndpoints()) - .scan(0, |acc, x| { - *acc += x; - Some(*acc) - }) - .collect(); - FrameSliceWindow { - window, - reference_index: window.len() / 2, - cum_lengths, - } - } - fn get_window_index( - &self, - index: usize, - ) -> (usize, usize) { - let mut pos = 0; - for (i, cum_length) in self.cum_lengths.iter().enumerate() { - if index < *cum_length { - pos = i; - break; - } - } - let within_window_index = index - self.cum_lengths[pos]; - (pos, within_window_index) - } -} - -impl AsAggregableAtIndex for FrameSliceWindow<'_> { - fn get_aggregable_at_index( - &self, - index: usize, - ) -> MaybeIntenseRawPeak { - let (pos, within_window_index) = self.get_window_index(index); - let tmp = &self.window[pos]; - let (tof, int) = tmp.tof_int_at_index(within_window_index); - let foo = MaybeIntenseRawPeak { - intensity: int, - tof_index: tof, - scan_index: tmp.global_scan_at_index(within_window_index), - weight_only: pos != self.reference_index, - }; - foo - } - - fn num_aggregable(&self) -> usize { - self.cum_lengths.last().unwrap().clone() - } -} - -impl IntenseAtIndex for FrameSliceWindow<'_> { - fn intensity_at_index( - &self, - index: usize, - ) -> u64 { - let (pos, within_window_index) = self.get_window_index(index); - if pos == self.reference_index { - self.window[self.reference_index].intensity_at_index(within_window_index) - } else { - 0 - } - } - - fn weight_at_index( - &self, - index: usize, - ) -> u64 { - let (pos, within_window_index) = self.get_window_index(index); - self.window[pos].weight_at_index(within_window_index) - } - - fn intensity_index_length(&self) -> usize { - self.cum_lengths.last().unwrap().clone() - } -} - -impl<'a> QueriableIndexedPoints<'a, 2> for FrameSliceWindow<'a> { - fn query_ndpoint( - &'a self, - point: &NDPoint<2>, - ) -> Vec { - let mut out = Vec::new(); - for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() - { - let local_outs = frame.query_ndpoint(point); - for ii in local_outs { - out.push(ii + cum_length); - } - } - out - } - - fn query_ndrange( - &'a self, - boundary: &crate::space::space_generics::NDBoundary<2>, - reference_point: Option<&NDPoint<2>>, - ) -> Vec { - let mut out = Vec::new(); - for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() - { - let local_outs = frame.query_ndrange(boundary, reference_point); - for ii in local_outs { - out.push(ii + cum_length); - } - } - out - } -} - -impl DistantAtIndex for FrameSliceWindow<'_> { - fn distance_at_indices( - &self, - index: usize, - other: usize, - ) -> f32 { - let (pos, within_window_index) = self.get_window_index(index); - let (pos_other, within_window_index_other) = self.get_window_index(other); - panic!("unimplemented"); - 0. - } -} - -impl AsNDPointsAtIndex<2> for FrameSliceWindow<'_> { - fn get_ndpoint( - &self, - index: usize, - ) -> NDPoint<2> { - let (pos, within_window_index) = self.get_window_index(index); - self.window[pos].get_ndpoint(within_window_index) - } - - fn num_ndpoints(&self) -> usize { - self.cum_lengths.last().unwrap().clone() - } -} - -#[derive(Default, Debug, Clone, Copy)] -pub struct RawWeightedTimsPeakAggregator { - pub cumulative_weighted_cluster_tof: u64, - pub cumulative_weighted_cluster_scan: u64, - pub cumulative_cluster_weight: u64, - pub cumulative_cluster_intensity: u64, - pub num_peaks: u64, - pub num_intense_peaks: u64, -} - -#[derive(Debug, Clone, Copy)] -struct RawScaleTimsPeak { - intensity: f64, - tof_index: f64, - scan_index: f64, - npeaks: u64, -} - -impl RawScaleTimsPeak { - fn to_timspeak( - &self, - mz_converter: &timsrust::Tof2MzConverter, - ims_converter: &timsrust::Scan2ImConverter, - ) -> TimsPeak { - TimsPeak { - intensity: self.intensity as u32, - mz: mz_converter.convert(self.tof_index), - mobility: ims_converter.convert(self.scan_index) as f32, - npeaks: self.npeaks as u32, - } - } -} - -impl ClusterAggregator for RawWeightedTimsPeakAggregator { - // Calculate the weight-weighted average of the cluster - // for mz and ims. The intensity is kept as is. - fn add( - &mut self, - elem: &MaybeIntenseRawPeak, - ) { - self.cumulative_cluster_intensity += - if elem.weight_only { 0 } else { elem.intensity } as u64; - self.cumulative_cluster_weight += elem.intensity as u64; - self.cumulative_weighted_cluster_tof += elem.tof_index as u64 * elem.intensity as u64; - self.cumulative_weighted_cluster_scan += elem.scan_index as u64 * elem.intensity as u64; - self.num_peaks += 1; - if !elem.weight_only { - self.num_intense_peaks += 1; - }; - } - - fn aggregate(&self) -> RawScaleTimsPeak { - // Use raw - RawScaleTimsPeak { - intensity: self.cumulative_cluster_intensity as f64, - tof_index: self.cumulative_weighted_cluster_tof as f64 - / self.cumulative_cluster_weight as f64, - scan_index: self.cumulative_weighted_cluster_scan as f64 - / self.cumulative_cluster_weight as f64, - npeaks: self.num_intense_peaks, - } - } - - fn combine( - self, - other: Self, - ) -> Self { - Self { - cumulative_weighted_cluster_tof: self.cumulative_weighted_cluster_tof - + other.cumulative_weighted_cluster_tof, - cumulative_weighted_cluster_scan: self.cumulative_weighted_cluster_scan - + other.cumulative_weighted_cluster_scan, - cumulative_cluster_weight: self.cumulative_cluster_weight - + other.cumulative_cluster_weight, - cumulative_cluster_intensity: self.cumulative_cluster_intensity - + other.cumulative_cluster_intensity, - num_peaks: self.num_peaks + other.num_peaks, - num_intense_peaks: self.num_intense_peaks + other.num_intense_peaks, - } - } -} - fn denoise_frame_slice_window( - frameslice_window: &[FrameSlice], + frameslice_window: &[ExpandedFrameSlice], ims_converter: &timsrust::Scan2ImConverter, mz_converter: &timsrust::Tof2MzConverter, dia_frame_info: &DIAFrameInfo, @@ -402,6 +160,9 @@ fn denoise_frame_slice_window( ) -> DenseFrameWindow { let timer = utils::ContextTimer::new("dbscan_dfs", true, utils::LogLevel::TRACE); let fsw = FrameSliceWindow::new(frameslice_window); + let ref_frame_parent_index = fsw.window[fsw.reference_index].parent_frame_index; + let saved_first = + maybe_save_json_if_debugging(&fsw, &*format!("fsw_{}", ref_frame_parent_index), false); // dbscan_aggregate( // &fsw, // &fsw, @@ -417,14 +178,12 @@ fn denoise_frame_slice_window( // false, // ); - let mut intensity_sorted_indices = frameslice_window - .iter() - .map(|x| x.intensities) - .flat_map(|x| x) - .enumerate() - .map(|(i, x)| (i, *x as u64)) - .collect::>(); - + let mut intensity_sorted_indices = Vec::with_capacity(fsw.num_ndpoints()); + for i in 0..fsw.num_ndpoints() { + // Should I only add the points in the reference frame?? + let intensity = fsw.intensity_at_index(i); + intensity_sorted_indices.push((i, intensity)); + } intensity_sorted_indices.par_sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); let mut i_timer = timer.start_sub_timer("dbscan"); @@ -450,24 +209,64 @@ fn denoise_frame_slice_window( false, ); + let ref_frame = &frameslice_window[frameslice_window.len() / 2]; + if ref_frame.slice_window_info.is_none() { + panic!("No slice window info found"); + } + + let slice_info = ref_frame.slice_window_info.as_ref().unwrap(); + let quad_group_id = match slice_info { + MsMsFrameSliceWindowInfo::WindowGroup(x) => *x, + MsMsFrameSliceWindowInfo::SingleWindow(x) => x.global_quad_row_id, + }; + let min_mz = match slice_info { + MsMsFrameSliceWindowInfo::WindowGroup(x) => 0.0, + MsMsFrameSliceWindowInfo::SingleWindow(x) => x.mz_start, + }; + let max_mz = match slice_info { + MsMsFrameSliceWindowInfo::WindowGroup(x) => 0.0, + MsMsFrameSliceWindowInfo::SingleWindow(x) => x.mz_end, + }; + + let mut raw_peaks: Vec = centroids + .into_iter() + .map(|x| x.to_timspeak(mz_converter, ims_converter)) + .collect(); + + raw_peaks.retain(|x| x.intensity > min_intensity as u32); + + let mut min_ims = f32::INFINITY; + let mut max_ims = f32::NEG_INFINITY; + + for peak in raw_peaks.iter() { + if peak.mobility < min_ims { + min_ims = peak.mobility; + } + if peak.mobility > max_ims { + max_ims = peak.mobility; + } + } + let out = DenseFrameWindow { frame: DenseFrame { - raw_peaks: centroids - .into_iter() - .map(|x| x.to_timspeak(mz_converter, ims_converter)) - .collect(), - index: 0, - rt: 0., + raw_peaks, + index: ref_frame.parent_frame_index, + rt: ref_frame.rt, frame_type: timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF), sorted: None, }, - ims_min: 0., - ims_max: 0., - mz_start: 0., - mz_end: 0., - group_id: 0, - quad_group_id: 0, + ims_max: max_ims, + ims_min: min_ims, + mz_start: min_mz as f64, + mz_end: max_mz as f64, + group_id: quad_group_id, + quad_group_id: quad_group_id, }; + maybe_save_json_if_debugging( + &out, + &*format!("dfw_out_{}", ref_frame_parent_index), + saved_first, + ); out } @@ -620,31 +419,70 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> { info!("Denoising {} frames", elems.len()); - let frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); + let mut frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); + + // If profiling and having the "IONMESH_PROFILE_NUM_WINDOWS" env variable set + // then only process the first N slices of windows. + // This is useful for profiling the code. + if let Ok(num_windows) = std::env::var("IONMESH_PROFILE_NUM_WINDOWS") { + let num_windows: usize = num_windows.parse().unwrap(); + log::warn!("Profiling: Only processing {} windows", num_windows); + frame_window_slices.truncate(num_windows); + } + let mut out = Vec::with_capacity(frame_window_slices.len()); let num_windows = frame_window_slices.len(); for (i, sv) in frame_window_slices.iter().enumerate() { info!("Denoising window {}/{}", i + 1, num_windows); + let start_tot_peaks = sv.iter().map(|x| x.num_ndpoints() as u64).sum::(); let progbar = indicatif::ProgressBar::new(sv.len() as u64); - let denoised_elements: Vec = sv - .as_slice() - .par_windows(3) - .progress_with(progbar) - .map(|rt_window_of_slices| { - denoise_frame_slice_window( - rt_window_of_slices, - &self.ims_converter, - &self.mz_converter, - &self.dia_frame_info, - self.min_n, - self.min_intensity, - self.mz_scaling, - self.max_mz_extension, - self.ims_scaling, - self.max_ims_extension, - ) - }) - .collect::>(); + + let lambda_denoise = |x: &[ExpandedFrameSlice]| { + denoise_frame_slice_window( + x, + &self.ims_converter, + &self.mz_converter, + &self.dia_frame_info, + self.min_n, + self.min_intensity, + self.mz_scaling, + self.max_mz_extension, + self.ims_scaling, + self.max_ims_extension, + ) + }; + + let mut denoised_elements: Vec = if cfg!(feature = "less_parallel") { + warn!("Running in less parallel mode"); + sv.into_iter() + .map(|x| ExpandedFrameSlice::from_frame_slice(x)) + .collect::>() + .windows(3) + .map(lambda_denoise) + .collect::>() + } else { + sv.into_par_iter() + .map(|x| ExpandedFrameSlice::from_frame_slice(x)) + .collect::>() + .par_windows(3) + .progress_with(progbar) + .map(lambda_denoise) + .collect::>() + }; + + info!("Denoised {} frames", denoised_elements.len()); + denoised_elements + .retain(|x| x.frame.raw_peaks.iter().map(|y| y.intensity).sum::() > 20); + info!("Retained {} frames", denoised_elements.len()); + let end_tot_peaks = denoised_elements + .iter() + .map(|x| x.frame.raw_peaks.len() as u64) + .sum::(); + let ratio = end_tot_peaks as f64 / start_tot_peaks as f64; + info!( + "Start peaks: {}, End peaks: {} -> ratio: {:.2}", + start_tot_peaks, end_tot_peaks, ratio + ); out.push(denoised_elements); } out @@ -697,7 +535,7 @@ pub fn read_all_ms1_denoising( pub fn read_all_dia_denoising( path: String, config: DenoiseConfig, -) -> (Vec, DIAFrameInfo) { +) -> (Vec>, DIAFrameInfo) { let mut timer = utils::ContextTimer::new("Reading all DIA frames", true, utils::LogLevel::INFO); let reader = timsrust::FileReader::new(path.clone()).unwrap(); @@ -729,8 +567,7 @@ pub fn read_all_dia_denoising( let mut timer = utils::ContextTimer::new("Denoising all MS2 frames", true, utils::LogLevel::INFO); let split_frames = denoiser.par_denoise_slice(frames); - let out: Vec = split_frames.into_iter().flatten().collect(); timer.stop(true); - (out, dia_info) + (split_frames, dia_info) } diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 7645ba0..74c8e61 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -209,7 +209,7 @@ impl TraceLike for BaseTrace { } pub fn combine_traces( - denseframe_windows: Vec, + grouped_denseframe_windows: Vec>, config: TracingConfig, rt_binsize: f32, ) -> Vec { @@ -222,43 +222,37 @@ pub fn combine_traces( let mut timer = utils::ContextTimer::new("Tracing peaks in time", true, utils::LogLevel::INFO); - let mut grouped_windows: Vec>>> = Vec::new(); - for dfw in denseframe_windows { - let dia_group = dfw.group_id; - let quad_group = dfw.quad_group_id; - - while grouped_windows.len() <= dia_group { - grouped_windows.push(Vec::new()); - } - - while grouped_windows[dia_group].len() <= quad_group { - grouped_windows[dia_group].push(None); - } - - if grouped_windows[dia_group][quad_group].is_none() { - grouped_windows[dia_group][quad_group] = Some(Vec::new()); - } else { - grouped_windows[dia_group][quad_group] - .as_mut() - .unwrap() - .push(dfw); - } - } - - // Flatten one level - let grouped_windows: Vec> = - grouped_windows.into_iter().flatten().flatten().collect(); - - let grouped_windows: Vec> = grouped_windows + let grouped_windows: Vec> = grouped_denseframe_windows .into_iter() .map(_flatten_denseframe_vec) .collect(); // Combine the traces - let out: Vec = grouped_windows + let out: Vec = if cfg!(feature = "less_parallel") { + warn!("Running in single-threaded mode"); + grouped_windows + .into_iter() + .map(|x| { + combine_single_window_traces( + x, + config.mz_scaling.into(), + config.max_mz_expansion_ratio, + config.rt_scaling.into(), + config.max_rt_expansion_ratio, + config.ims_scaling.into(), + config.max_ims_expansion_ratio, + config.min_n.into(), + config.min_neighbor_intensity, + rt_binsize, + ) + }) + .flatten() + .collect() + } else { + grouped_windows .into_par_iter() .map(|x| { - _combine_single_window_traces( + combine_single_window_traces( x, config.mz_scaling.into(), config.max_mz_expansion_ratio, @@ -272,7 +266,8 @@ pub fn combine_traces( ) }) .flatten() - .collect(); + .collect() + }; info!("Total Combined traces: {}", out.len()); timer.stop(true); @@ -471,7 +466,7 @@ impl DistantAtIndex for Vec { type FFTimeTimsPeak = fn(&TimeTimsPeak, &TimeTimsPeak) -> bool; // TODO maybe this can be a builder-> executor pattern -fn _combine_single_window_traces( +fn combine_single_window_traces( prefiltered_peaks: Vec, mz_scaling: f64, max_mz_expansion_ratio: f32, @@ -483,7 +478,7 @@ fn _combine_single_window_traces( min_intensity: u32, rt_binsize: f32, ) -> Vec { - debug!("Prefiltered peaks: {}", prefiltered_peaks.len()); + info!("Peaks in window: {}", prefiltered_peaks.len()); let converter: TimeTimsPeakConverter = TimeTimsPeakConverter { mz_scaling, rt_scaling, diff --git a/src/ms/frames/dense_frame_window.rs b/src/ms/frames/dense_frame_window.rs index ef763e7..db6400a 100644 --- a/src/ms/frames/dense_frame_window.rs +++ b/src/ms/frames/dense_frame_window.rs @@ -1,3 +1,4 @@ +use serde::Serialize; use timsrust::{ConvertableIndex, Frame, Scan2ImConverter, Tof2MzConverter}; use crate::ms::{ @@ -16,7 +17,7 @@ fn check_peak_sanity(peak: &TimsPeak) { debug_assert!(peak.npeaks > 0); } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct DenseFrameWindow { pub frame: DenseFrame, pub ims_min: f32, @@ -99,7 +100,7 @@ impl DenseFrame { let num_tofs = index_offset - last_scan_offset; let ims = ims_converter.convert(scan_index as u32) as f32; - expanded_scan_indices.extend(vec![ims; num_tofs]); + expanded_scan_indices.extend(vec![ims; num_tofs as usize]); last_scan_offset = *index_offset; } @@ -153,10 +154,10 @@ impl DenseFrame { info!("frame_window.scan_start: {}", frame_window.scan_start); } debug_assert!(ims >= 0.0); - expanded_scan_indices.extend(vec![ims; num_tofs]); + expanded_scan_indices.extend(vec![ims; num_tofs as usize]); last_scan_offset = *index_offset; } - debug_assert!(last_scan_offset == frame_window.tof_indices.len()); + debug_assert!(last_scan_offset as usize == frame_window.tof_indices.len()); let peaks = expanded_scan_indices .iter() diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index 62c38e7..0dd98df 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -1,8 +1,14 @@ +use log::info; +use serde::Serialize; use std::fmt; use timsrust::{Frame, FrameType}; -use crate::space::space_generics::{ - AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, +use crate::{ + space::space_generics::{ + convert_to_bounds_query, AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, + QueriableIndexedPoints, + }, + utils::binary_search_slice, }; use super::FrameMsMsWindowInfo; @@ -89,13 +95,16 @@ impl fmt::Display for ScanOutOfBoundsError { /// Additions for FrameSlice: /// - scan_start 123 // The scan number of the first scan offset in the current window. /// - slice_window_info Some(MsMsFrameSliceWindowInfo::SingleWindow(FrameMsMsWindow)) -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct FrameSlice<'a> { - pub scan_offsets: &'a [usize], + // pub scan_offsets: &'a [usize], // Timsrust changed this later ... + pub scan_offsets: &'a [u64], pub tof_indices: &'a [u32], pub intensities: &'a [u32], pub parent_frame_index: usize, pub rt: f64, + + #[serde(skip)] pub frame_type: FrameType, // From this point on they are local implementations @@ -104,6 +113,19 @@ pub struct FrameSlice<'a> { pub slice_window_info: Option, } +#[derive(Debug, Clone, Serialize)] +pub struct ExpandedFrameSlice { + pub scan_numbers: Vec, + pub tof_indices: Vec, + pub intensities: Vec, + pub parent_frame_index: usize, + pub rt: f64, + pub slice_window_info: Option, + + #[serde(skip)] + pub frame_type: FrameType, +} + impl<'a> FrameSlice<'a> { pub fn slice_frame( frame: &'a Frame, @@ -113,8 +135,8 @@ impl<'a> FrameSlice<'a> { ) -> FrameSlice<'a> { let scan_offsets = &frame.scan_offsets[scan_start..=scan_end]; - let indprt_start = scan_offsets[0]; - let indptr_end = *scan_offsets.last().expect("Scan range is empty"); + let indprt_start = scan_offsets[0] as usize; + let indptr_end = *scan_offsets.last().expect("Scan range is empty") as usize; let tof_indices = &frame.tof_indices[indprt_start..indptr_end]; let intensities = &frame.intensities[indprt_start..indptr_end]; @@ -122,9 +144,16 @@ impl<'a> FrameSlice<'a> { debug_assert!(indptr_end - indprt_start == tof_indices.len()); #[cfg(debug_assertions)] { + let init_offset = scan_offsets[0]; for i in 1..(scan_offsets.len() - 1) { debug_assert!(scan_offsets[i] <= scan_offsets[i + 1]); - debug_assert!((scan_offsets[i + 1] - scan_start) <= tof_indices.len()); + debug_assert!( + (scan_offsets[i + 1] - init_offset) <= tof_indices.len() as u64, + "scan_offsets[i+1]: {}, init_offset: {}, tof_indices.len(): {}", + scan_offsets[i + 1], + init_offset, + tof_indices.len() + ); } } @@ -151,7 +180,7 @@ impl<'a> FrameSlice<'a> { local_index: usize, ) -> usize { debug_assert!(local_index < self.tof_indices.len()); - let search_val = self.scan_offsets[0] + local_index; + let search_val = self.scan_offsets[0] + local_index as u64; let loc = self .scan_offsets .binary_search_by(|x| x.partial_cmp(&search_val).unwrap()); @@ -174,7 +203,7 @@ impl<'a> FrameSlice<'a> { for (scan_index, index_offsets) in self.scan_offsets.windows(2).enumerate() { let num_tofs = index_offsets[1] - index_offsets[0]; - scan_numbers.extend(vec![curr_scan + scan_index; num_tofs]); + scan_numbers.extend(vec![curr_scan + scan_index; num_tofs as usize]); } if cfg!(debug_assertions) { @@ -250,8 +279,8 @@ impl<'a> FrameSlice<'a> { scan_index: usize, ) -> ((&[u32], &[u32]), usize) { let offset_offset = self.scan_offsets[0]; - let scan_start = self.scan_offsets[scan_index] - offset_offset; - let scan_end = self.scan_offsets[scan_index + 1] - offset_offset; + let scan_start = (self.scan_offsets[scan_index] - offset_offset) as usize; + let scan_end = (self.scan_offsets[scan_index + 1] - offset_offset) as usize; let tof_indices = &self.tof_indices[scan_start..scan_end]; let intensities = &self.intensities[scan_start..scan_end]; ((tof_indices, intensities), scan_start) @@ -366,6 +395,47 @@ impl<'a> FrameSlice<'a> { } } +impl ExpandedFrameSlice { + pub fn from_frame_slice(frame_slice: &FrameSlice) -> ExpandedFrameSlice { + let parent_frame_index = frame_slice.parent_frame_index; + let rt = frame_slice.rt; + let slice_window_info = frame_slice.slice_window_info.clone(); + let frame_type = frame_slice.frame_type; + let scan_numbers = frame_slice.explode_scan_numbers(); + + // Sort all arrays on the tof indices. + let mut zipped = frame_slice + .tof_indices + .iter() + .zip(frame_slice.intensities.iter()) + .zip(scan_numbers.iter()) + .collect::>(); + + zipped.sort_unstable_by(|a, b| a.0 .0.cmp(&b.0 .0)); + + let (tof_indices, intensities, scan_numbers) = zipped.into_iter().fold( + (Vec::new(), Vec::new(), Vec::new()), + |(mut tof_indices, mut intensities, mut scan_numbers), + ((tof_index, intensity), scan_number)| { + tof_indices.push(*tof_index); + intensities.push(*intensity); + scan_numbers.push(*scan_number); + (tof_indices, intensities, scan_numbers) + }, + ); + + ExpandedFrameSlice { + scan_numbers, + tof_indices, + intensities, + parent_frame_index, + rt, + slice_window_info, + frame_type, + } + } +} + // Tests for the FrameSlice #[cfg(test)] mod tests { @@ -393,7 +463,7 @@ mod tests { assert_eq!(frame_slice.parent_frame_index, 0); assert_eq!(frame_slice.rt, 65.34); assert_eq!(frame_slice.frame_type, FrameType::MS1); - assert_eq!(frame_slice.scan_start, 0); + assert_eq!(frame_slice.scan_start, 3); } #[test] @@ -558,6 +628,25 @@ mod tests { } } +impl IntenseAtIndex for ExpandedFrameSlice { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self.intensities[index] as u64 + } + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + self.intensities[index] as u64 + } + + fn intensity_index_length(&self) -> usize { + self.intensities.len() + } +} + impl<'a> IntenseAtIndex for FrameSlice<'a> { fn intensity_at_index( &self, @@ -594,6 +683,24 @@ impl<'a> IntenseAtIndex for FrameSlice<'a> { // } } +impl AsNDPointsAtIndex<2> for ExpandedFrameSlice { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<2> { + let scan_index = self.scan_numbers[index]; + let tof_index = self.tof_indices[index]; + + NDPoint { + values: [tof_index as f32, scan_index as f32], + } + } + + fn num_ndpoints(&self) -> usize { + self.intensities.len() + } +} + impl<'a> AsNDPointsAtIndex<2> for FrameSlice<'a> { fn get_ndpoint( &self, @@ -612,6 +719,47 @@ impl<'a> AsNDPointsAtIndex<2> for FrameSlice<'a> { } } +impl QueriableIndexedPoints<'_, 2> for ExpandedFrameSlice { + fn query_ndpoint( + &self, + point: &NDPoint<2>, + ) -> Vec { + let query = convert_to_bounds_query(point); + self.query_ndrange(&query.0, query.1) + } + + fn query_ndrange( + &self, + boundary: &NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec { + // TODO implement passing information on the mz tolerance ... + // info!("Querying frame slice with boundary: {:?}", boundary); + // let tol = AbsoluteFramePointTolerance { + // tof_index_tolerance: (boundary.widths[0] / 2.) as u32, + // scan_tolerance: (boundary.widths[1] / 2.) as usize, + // }; + const SCAN_NUMBER_TOLERANCE: usize = 10; + let scan_left = (boundary.starts[1] as usize).saturating_sub(SCAN_NUMBER_TOLERANCE); + let scan_right = (boundary.ends[1] as usize).saturating_add(SCAN_NUMBER_TOLERANCE); + + let (left, right) = binary_search_slice( + &self.tof_indices, + |a, b| a.cmp(b), + boundary.starts[0] as u32, + boundary.ends[0] as u32, + ); + let mut out = Vec::new(); + for i in left..right { + let scan_i = self.scan_numbers[i]; + if scan_i >= scan_left && scan_i <= scan_right { + out.push(i); + } + } + out + } +} + impl<'a> QueriableIndexedPoints<'a, 2> for FrameSlice<'a> { fn query_ndpoint( &'a self, @@ -623,8 +771,8 @@ impl<'a> QueriableIndexedPoints<'a, 2> for FrameSlice<'a> { tof_index, ScanNumberType::Global(scan_index), &AbsoluteFramePointTolerance { - tof_index_tolerance: 1, - scan_tolerance: 1, + tof_index_tolerance: 2, + scan_tolerance: 5, }, ); @@ -644,9 +792,15 @@ impl<'a> QueriableIndexedPoints<'a, 2> for FrameSlice<'a> { boundary: &NDBoundary<2>, reference_point: Option<&NDPoint<2>>, ) -> Vec { + // TODO implement passing information on the mz tolerance ... + // info!("Querying frame slice with boundary: {:?}", boundary); + // let tol = AbsoluteFramePointTolerance { + // tof_index_tolerance: (boundary.widths[0] / 2.) as u32, + // scan_tolerance: (boundary.widths[1] / 2.) as usize, + // }; let tol = AbsoluteFramePointTolerance { - tof_index_tolerance: (boundary.widths[0] / 2.) as u32, - scan_tolerance: (boundary.widths[1] / 2.) as usize, + tof_index_tolerance: (boundary.widths[0] * 2.) as u32, + scan_tolerance: (boundary.widths[1] * 10.) as usize, }; let rangesets = self.matching_rangeset( boundary.centers[0] as i32, @@ -759,7 +913,7 @@ impl RangeSet { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub enum MsMsFrameSliceWindowInfo { WindowGroup(usize), SingleWindow(FrameMsMsWindowInfo), diff --git a/src/ms/frames/frame_slice_rt_window.rs b/src/ms/frames/frame_slice_rt_window.rs new file mode 100644 index 0000000..73cffb0 --- /dev/null +++ b/src/ms/frames/frame_slice_rt_window.rs @@ -0,0 +1,278 @@ +use log::trace; +use serde::Serialize; +use timsrust::ConvertableIndex; + +use crate::{ + aggregation::aggregators::ClusterAggregator, + space::space_generics::{ + AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDPoint, + QueriableIndexedPoints, + }, +}; + +use super::{ExpandedFrameSlice, FrameSlice, TimsPeak}; + +#[derive(Debug, Serialize)] +pub struct FrameSliceWindow<'a> { + pub window: &'a [ExpandedFrameSlice], + pub reference_index: usize, + pub cum_lengths: Vec, +} + +#[derive(Debug, Clone, Copy)] +pub struct MaybeIntenseRawPeak { + pub intensity: u32, + pub tof_index: u32, + pub scan_index: usize, + pub weight_only: bool, +} + +impl FrameSliceWindow<'_> { + pub fn new<'a>(window: &'a [ExpandedFrameSlice]) -> FrameSliceWindow<'a> { + let cum_lengths = window + .iter() + .map(|x| x.num_ndpoints()) + .scan(0, |acc, x| { + *acc += x; + Some(*acc) + }) + .collect(); + trace!("Cumulative lengths: {:?}", cum_lengths); + FrameSliceWindow { + window, + reference_index: window.len() / 2, + cum_lengths, + } + } + fn get_window_index( + &self, + index: usize, + ) -> (usize, usize) { + let mut pos = 0; + let mut last_cum_length = 0; + for (i, cum_length) in self.cum_lengths.iter().enumerate() { + if index < *cum_length { + pos = i; + break; + } + last_cum_length = *cum_length; + } + + debug_assert!( + index < self.cum_lengths.last().unwrap().clone(), + "Index out of bounds, generated index: {}, pos: {}, cum_lengths: {:?}", + index, + pos, + self.cum_lengths + ); + let within_window_index = index - last_cum_length; + + if cfg!(debug_assertions) { + assert!(self.window[pos].intensities.len() > within_window_index, + "Index out of bounds, generated index: {}, within_window_index: {}, pos: {}, cum_lengths: {:?}", + index, within_window_index, pos, self.cum_lengths, + ); + } + + (pos, within_window_index) + } +} + +impl AsAggregableAtIndex for FrameSliceWindow<'_> { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> MaybeIntenseRawPeak { + let (pos, within_window_index) = self.get_window_index(index); + let tmp = &self.window[pos]; + let tof = tmp.tof_indices[within_window_index]; + let int = tmp.intensities[within_window_index]; + let scan = tmp.scan_numbers[within_window_index]; + let foo = MaybeIntenseRawPeak { + intensity: int, + tof_index: tof, + scan_index: scan, + weight_only: pos != self.reference_index, + }; + foo + } + + fn num_aggregable(&self) -> usize { + self.cum_lengths.last().unwrap().clone() + } +} + +impl IntenseAtIndex for FrameSliceWindow<'_> { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + let (pos, within_window_index) = self.get_window_index(index); + if pos == self.reference_index { + self.window[self.reference_index].intensity_at_index(within_window_index) + } else { + 0 + } + } + + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + let (pos, within_window_index) = self.get_window_index(index); + self.window[pos].weight_at_index(within_window_index) + } + + fn intensity_index_length(&self) -> usize { + self.cum_lengths.last().unwrap().clone() + } +} + +impl<'a> QueriableIndexedPoints<'a, 2> for FrameSliceWindow<'a> { + fn query_ndpoint( + &'a self, + point: &NDPoint<2>, + ) -> Vec { + let mut out = Vec::new(); + let mut last_cum_length = 0; + for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() + { + let local_outs = frame.query_ndpoint(point); + for ii in local_outs { + out.push(ii + last_cum_length); + } + last_cum_length = *cum_length; + } + out + } + + fn query_ndrange( + &'a self, + boundary: &crate::space::space_generics::NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec { + let mut out = Vec::new(); + let last = self.cum_lengths.last().unwrap().clone(); + let mut last_cum_length = 0; + for (frame, cum_length) in self.window.iter().zip(self.cum_lengths.iter()) { + let local_outs = frame.query_ndrange(boundary, reference_point); + for ii in local_outs { + let pi = ii + last_cum_length; + debug_assert!(pi < last, "Index out of bounds: {}, last: {}", pi, last); + out.push(pi); + } + last_cum_length = *cum_length; + } + + out + } +} + +impl DistantAtIndex for FrameSliceWindow<'_> { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + let (pos, within_window_index) = self.get_window_index(index); + let (pos_other, within_window_index_other) = self.get_window_index(other); + panic!("unimplemented"); + 0. + } +} + +impl AsNDPointsAtIndex<2> for FrameSliceWindow<'_> { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<2> { + let (pos, within_window_index) = self.get_window_index(index); + self.window[pos].get_ndpoint(within_window_index) + } + + fn num_ndpoints(&self) -> usize { + self.cum_lengths.last().unwrap().clone() + } +} + +#[derive(Default, Debug, Clone, Copy)] +pub struct RawWeightedTimsPeakAggregator { + pub cumulative_weighted_cluster_tof: u64, + pub cumulative_weighted_cluster_scan: u64, + pub cumulative_cluster_weight: u64, + pub cumulative_cluster_intensity: u64, + pub num_peaks: u64, + pub num_intense_peaks: u64, +} + +#[derive(Debug, Clone, Copy)] +pub struct RawScaleTimsPeak { + pub intensity: f64, + pub tof_index: f64, + pub scan_index: f64, + pub npeaks: u64, +} + +impl RawScaleTimsPeak { + pub fn to_timspeak( + &self, + mz_converter: &timsrust::Tof2MzConverter, + ims_converter: &timsrust::Scan2ImConverter, + ) -> TimsPeak { + TimsPeak { + intensity: self.intensity as u32, + mz: mz_converter.convert(self.tof_index), + mobility: ims_converter.convert(self.scan_index) as f32, + npeaks: self.npeaks as u32, + } + } +} + +impl ClusterAggregator for RawWeightedTimsPeakAggregator { + // Calculate the weight-weighted average of the cluster + // for mz and ims. The intensity is kept as is. + fn add( + &mut self, + elem: &MaybeIntenseRawPeak, + ) { + self.cumulative_cluster_intensity += + if elem.weight_only { 0 } else { elem.intensity } as u64; + self.cumulative_cluster_weight += elem.intensity as u64; + self.cumulative_weighted_cluster_tof += elem.tof_index as u64 * elem.intensity as u64; + self.cumulative_weighted_cluster_scan += elem.scan_index as u64 * elem.intensity as u64; + self.num_peaks += 1; + if !elem.weight_only { + self.num_intense_peaks += 1; + }; + } + + fn aggregate(&self) -> RawScaleTimsPeak { + // Use raw + RawScaleTimsPeak { + intensity: self.cumulative_cluster_intensity as f64, + tof_index: self.cumulative_weighted_cluster_tof as f64 + / self.cumulative_cluster_weight as f64, + scan_index: self.cumulative_weighted_cluster_scan as f64 + / self.cumulative_cluster_weight as f64, + npeaks: self.num_intense_peaks, + } + } + + fn combine( + self, + other: Self, + ) -> Self { + Self { + cumulative_weighted_cluster_tof: self.cumulative_weighted_cluster_tof + + other.cumulative_weighted_cluster_tof, + cumulative_weighted_cluster_scan: self.cumulative_weighted_cluster_scan + + other.cumulative_weighted_cluster_scan, + cumulative_cluster_weight: self.cumulative_cluster_weight + + other.cumulative_cluster_weight, + cumulative_cluster_intensity: self.cumulative_cluster_intensity + + other.cumulative_cluster_intensity, + num_peaks: self.num_peaks + other.num_peaks, + num_intense_peaks: self.num_intense_peaks + other.num_intense_peaks, + } + } +} diff --git a/src/ms/frames/frames.rs b/src/ms/frames/frames.rs index 5e011bc..0bb455d 100644 --- a/src/ms/frames/frames.rs +++ b/src/ms/frames/frames.rs @@ -1,3 +1,4 @@ +use serde::Serialize; pub use timsrust::Frame; pub use timsrust::FrameType; pub use timsrust::{ @@ -6,7 +7,7 @@ pub use timsrust::{ use crate::space::space_generics::HasIntensity; -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Serialize)] pub struct TimsPeak { pub intensity: u32, pub mz: f64, @@ -48,17 +49,22 @@ impl<'a> HasIntensity for RawTimsPeakReference<'a> { #[derive(Debug, Clone, Copy)] pub enum SortingOrder { + None, Mz, Mobility, Intensity, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct DenseFrame { pub raw_peaks: Vec, pub index: usize, pub rt: f64, + + #[serde(skip_serializing)] pub frame_type: FrameType, + + #[serde(skip_serializing)] pub sorted: Option, } @@ -66,7 +72,7 @@ pub struct DenseFrame { /// /// This adds to a frame slice the context of the what isolation was used /// to generate the frame slice. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub struct FrameMsMsWindowInfo { pub mz_start: f32, pub mz_end: f32, diff --git a/src/ms/frames/mod.rs b/src/ms/frames/mod.rs index 11e84b4..13c348b 100644 --- a/src/ms/frames/mod.rs +++ b/src/ms/frames/mod.rs @@ -1,6 +1,7 @@ pub mod dense_frame_window; pub mod frame_slice; +pub mod frame_slice_rt_window; pub mod frames; pub use dense_frame_window::{Converters, DenseFrameWindow}; -pub use frame_slice::{FrameSlice, MsMsFrameSliceWindowInfo}; +pub use frame_slice::{ExpandedFrameSlice, FrameSlice, MsMsFrameSliceWindowInfo}; pub use frames::{DenseFrame, FrameMsMsWindowInfo, TimsPeak}; diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index 1cb4a5e..a08beb5 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -265,6 +265,7 @@ impl DIAFrameInfo { for group in out.iter_mut() { group.sort_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); } + out.retain(|x| !x.is_empty()); // Debug assert that the frames are sorted by rt if cfg!(debug_assertions) { diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index f02d1d5..d506d5b 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -161,6 +161,17 @@ pub trait IntenseAtIndex { .map(|i| (i, self.intensity_at_index(i))) .collect(); indices.par_sort_unstable_by_key(|&x| x.1); + + debug_assert!(indices.len() == self.intensity_index_length()); + if cfg!(debug_assertions) { + if indices.len() > 1 { + for i in 1..indices.len() { + if indices[i - 1].1 > indices[i].1 { + panic!("Indices are not sorted"); + } + } + } + } indices } } diff --git a/src/utils.rs b/src/utils.rs index 5f4097a..d25f6d5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,6 +1,9 @@ -use log::{debug, info, trace}; +use log::{debug, info, trace, warn}; use num::cast::AsPrimitive; -use std::time::{Duration, Instant}; +use std::{ + cmp::Ordering, + time::{Duration, Instant}, +}; pub struct ContextTimer { start: Instant, @@ -394,6 +397,96 @@ pub fn get_stats(data: &[f64]) -> Stats { } } +/// This has been shamelessly copied from sage. +/// https://github.com/lazear/sage/blob/93a9a8a7c9f717238fc6c582c0dd501a56159be7/crates/sage/src/database.rs#L498 +/// Althought it really feels like this should be in the standard lib. +/// +/// Usage: +/// ```rust +/// let data = [1.0, 1.5, 1.5, 1.5, 1.5, 2.0, 2.5, 3.0, 3.0, 3.5, 4.0]; +/// let (left, right) = binary_search_slice(&data, |a: &f64, b| a.total_cmp(b), 1.5, 3.25); +/// assert!(data[left] <= 1.5); +/// assert!(data[right] > 3.25); +/// assert_eq!( +/// &data[left..right], +/// &[1.0, 1.5, 1.5, 1.5, 1.5, 2.0, 2.5, 3.0, 3.0] +/// ); +/// ``` +/// +#[inline] +pub fn binary_search_slice( + slice: &[T], + key: F, + low: S, + high: S, +) -> (usize, usize) +where + F: Fn(&T, &S) -> Ordering, +{ + let left_idx = match slice.binary_search_by(|a| key(a, &low)) { + Ok(idx) | Err(idx) => { + let mut idx = idx.saturating_sub(1); + while idx > 0 && key(&slice[idx], &low) != Ordering::Less { + idx -= 1; + } + idx + }, + }; + + let right_idx = match slice[left_idx..].binary_search_by(|a| key(a, &high)) { + Ok(idx) | Err(idx) => { + let mut idx = idx + left_idx; + while idx < slice.len() && key(&slice[idx], &high) != Ordering::Greater { + idx = idx.saturating_add(1); + } + idx.min(slice.len()) + }, + }; + (left_idx, right_idx) +} + +/// Serializes to json the object if debug assertions are +/// enabled and an env variable with the frequency is set. +/// the env variable should be named `IONMESH_DEBUG_JSON_FREQUENCY` +/// Also derive the bath to ave to from the env variable `IONMESH_DEBUG_JSON_PATH` +/// which is created if it does not exist. +/// The object is serialized to a file named `{name}.json` +pub fn maybe_save_json_if_debugging( + obj: &T, + name: &str, + force: bool, +) -> bool +where + T: serde::Serialize, +{ + if cfg!(debug_assertions) { + let freq = std::env::var("IONMESH_DEBUG_JSON_FREQUENCY"); + if let Ok(freq) = freq { + let freq = freq.parse::().unwrap(); + if force || (freq > 0) { + if force || (rand::random::() % freq == 0) { + let json = serde_json::to_string_pretty(obj).unwrap(); + let path = std::env::var("IONMESH_DEBUG_JSON_PATH"); + let path = if let Ok(path) = path { + if !std::path::Path::new(&path).exists() { + std::fs::create_dir_all(&path).unwrap(); + } + std::path::Path::new(&path).join(format!("{}.json", name)) + } else { + warn!("IONMESH_DEBUG_JSON_PATH not set, saving to current directory"); + std::path::Path::new(".").join(format!("{}.json", name)) + }; + info!("Saving json to {:?}", path); + + std::fs::write(path, json).unwrap(); + return true; + } + } + } + } + false +} + #[cfg(test)] mod test_rolling_sd { use super::*; From ec2f3c50de6b77edf63c004dc7b69c0a34f676e1 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Sun, 21 Jul 2024 21:37:07 -0700 Subject: [PATCH 20/26] (feat) changed indexing instrategy for traces and added less paralel mode for profiling --- README.md | 13 + src/aggregation/aggregators.rs | 50 ++-- src/aggregation/chromatograms.rs | 7 +- src/aggregation/dbscan/dbscan.rs | 4 +- src/aggregation/dbscan/runner.rs | 372 +++++++++++++++---------- src/aggregation/ms_denoise.rs | 44 +-- src/aggregation/tracing.rs | 364 +++++++++++++++++++++--- src/main.rs | 2 +- src/ms/frames/frame_slice.rs | 8 +- src/ms/frames/frame_slice_rt_window.rs | 6 +- src/space/kdtree.rs | 6 +- src/space/quad.rs | 6 +- src/space/space_generics.rs | 6 +- src/utils.rs | 51 +++- 14 files changed, 694 insertions(+), 245 deletions(-) diff --git a/README.md b/README.md index 302d112..5f0561c 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,19 @@ IONMESH_PROFILE_NUM_WINDOWS # windows. ``` +### Profiling + +Mostly notes to self but someone else might benefit ... + +``` +# Making a cool flamegraph of a run +sudo RUST_LOG=debug CARGO_PROFILE_RELEASE_DEBUG=true IONMESH_PROFILE_NUM_WINDOWS=2 \ + cargo flamegraph \ + --output flamegraph_secondversion.svg \ + --features less_parallel \ + -- --config ./benchmark/default_ionmesh_config.toml benchmark/${MYFAVORITEFILE}.d -o tmp/ +``` + ## Roadmap 1. Use aggregation metrics to re-score sage search. diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index ce23c0b..6d4bdec 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -181,33 +181,35 @@ fn parallel_aggregate_clusters< let mut cluster_vecs = out2.into_iter().flatten().collect::>(); - let unclustered_elems: Vec = cluster_labels - .iter() - .enumerate() - .filter(|(_, x)| match x { - ClusterLabel::Unassigned => true, - ClusterLabel::Noise => keep_unclustered, - _ => false, - }) - .map(|(i, _elem)| i) - .collect(); + if keep_unclustered { + let unclustered_elems: Vec = cluster_labels + .iter() + .enumerate() + .filter(|(_, x)| match x { + ClusterLabel::Unassigned => true, // Should there be any unassigned? + ClusterLabel::Noise => true, + ClusterLabel::Cluster(_) => false, + }) + .map(|(i, _elem)| i) + .collect(); - // if unclustered_elems.len() > 0 { - // log::debug!("Total Orig elems: {}", cluster_labels.len()); - // log::debug!("Unclustered elems: {}", unclustered_elems.len()); - // log::debug!("Clustered elems: {}", cluster_vecs.len()); - // } + // if unclustered_elems.len() > 0 { + // log::debug!("Total Orig elems: {}", cluster_labels.len()); + // log::debug!("Unclustered elems: {}", unclustered_elems.len()); + // log::debug!("Clustered elems: {}", cluster_vecs.len()); + // } - let unclustered_elems = unclustered_elems - .iter() - .map(|i| { - let mut oe = def_aggregator(); - oe.add(&elements.get_aggregable_at_index(*i)); - oe - }) - .collect::>(); + let unclustered_elems = unclustered_elems + .iter() + .map(|i| { + let mut oe = def_aggregator(); + oe.add(&elements.get_aggregable_at_index(*i)); + oe + }) + .collect::>(); - cluster_vecs.extend(unclustered_elems); + cluster_vecs.extend(unclustered_elems); + } timer.stop(true); cluster_vecs diff --git a/src/aggregation/chromatograms.rs b/src/aggregation/chromatograms.rs index 17e976b..37c7b2d 100644 --- a/src/aggregation/chromatograms.rs +++ b/src/aggregation/chromatograms.rs @@ -254,7 +254,12 @@ impl< let mut mag_a = T::default(); let mut mag_b = T::default(); for i in 0..NBINS { - let other_index = i + other_vs_self_offset as usize; + let other_index = i as i32 + other_vs_self_offset; + if other_index < 0 { + continue; + } + + let other_index = other_index as usize; if other_index >= other.chromatogram.len() { continue; } diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index 7d78f4c..ac4408d 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -20,7 +20,7 @@ fn reassign_centroid< const N: usize, T: Send + Clone + Copy, C: NDPointConverter, - I: QueriableIndexedPoints<'a, N> + std::marker::Sync, + I: QueriableIndexedPoints + std::marker::Sync, G: Sync + Send + ClusterAggregator, R: Send, RE: Send + Sync + AsAggregableAtIndex + ?Sized, @@ -167,7 +167,7 @@ pub fn dbscan_aggregate< + Sync + std::fmt::Debug + ?Sized, - IND: QueriableIndexedPoints<'a, N> + std::marker::Sync + Send + std::fmt::Debug, + IND: QueriableIndexedPoints + std::marker::Sync + Send + std::fmt::Debug, NAI: AsNDPointsAtIndex + std::marker::Sync + Send, T: HasIntensity + Send + Clone + Copy + Sync, D: Send + Sync, diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 007ec8f..f161d91 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,13 +1,14 @@ use crate::space::space_generics::{ convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, - NDPoint, QueriableIndexedPoints, + NDBoundary, NDPoint, QueriableIndexedPoints, }; use crate::space::space_generics::{AsAggregableAtIndex, NDPointConverter}; -use std::marker::PhantomData; - use crate::utils; +use core::fmt::Debug; use indicatif::ProgressIterator; -use log::trace; +use log::{debug, trace}; +use std::marker::PhantomData; +use std::sync::Arc; use crate::aggregation::aggregators::ClusterLabel; use crate::aggregation::dbscan::utils::FilterFunCache; @@ -105,6 +106,7 @@ impl ClusterLabels { struct DBScanTimers { main: utils::ContextTimer, + // TODO aux timers can probably be a hashmap filter_fun_cache_timer: utils::ContextTimer, outer_loop_nn_timer: utils::ContextTimer, inner_loop_nn_timer: utils::ContextTimer, @@ -209,20 +211,45 @@ impl DBSCANRunnerState { } } -struct DBSCANRunner<'a, const N: usize, D> { +struct DBSCANRunner<'a, const N: usize, D, FF> +where + FF: Fn(&D) -> bool + Send + Sync + ?Sized, + D: Send + Sync, +{ min_n: usize, min_intensity: u64, - filter_fun: Option<&'a (dyn Fn(&D) -> bool + Send + Sync)>, + filter_fun: Option<&'a FF>, progress: bool, max_extension_distances: &'a [f32; N], + _phantom: PhantomData, } +impl<'a, const N: usize, D, FF> Debug for DBSCANRunner<'a, N, D, FF> +where + FF: Fn(&D) -> bool + Send + Sync + ?Sized, + D: Send + Sync, +{ + fn fmt( + &self, + f: &mut std::fmt::Formatter<'_>, + ) -> std::fmt::Result { + f.debug_struct("DBSCANRunner") + .field("min_n", &self.min_n) + .field("min_intensity", &self.min_intensity) + .field("filter_fun", &"Some bool>???") + .field("progress", &self.progress) + .field("max_extension_distances", &self.max_extension_distances) + .finish() + } +} + +#[derive(Clone)] struct DBSCANPoints<'a, const N: usize, PP, PE, DAI, E, QIP> where - PP: IntenseAtIndex + std::marker::Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, + PP: IntenseAtIndex + Send + Sync + ?Sized, + PE: AsNDPointsAtIndex + Send + Sync + ?Sized, DAI: DistantAtIndex + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + QIP: QueriableIndexedPoints + Sync, { raw_elements: &'a PP, // &'a Vec, intensity_sorted_indices: Vec<(usize, u64)>, @@ -232,54 +259,117 @@ where _phantom_metric: PhantomData, } -impl<'a, const N: usize, PP, QQ, D, E, QIP> DBSCANPoints<'a, N, PP, QQ, D, E, QIP> +impl<'a, const N: usize, PP, QQ, DAI, E, QIP> QueriableIndexedPoints + for DBSCANPoints<'a, N, PP, QQ, DAI, E, QIP> +where + PP: IntenseAtIndex + Send + Sync + ?Sized, + QQ: AsNDPointsAtIndex + Send + Sync + ?Sized, + DAI: DistantAtIndex + ?Sized, + QIP: QueriableIndexedPoints + Sync, +{ + fn query_ndpoint( + &self, + point: &NDPoint, + ) -> Vec { + self.indexed_points.query_ndpoint(point) + } + + fn query_ndrange( + &self, + boundary: &NDBoundary, + reference_point: Option<&NDPoint>, + ) -> Vec { + self.indexed_points.query_ndrange(boundary, reference_point) + } +} + +impl<'a, const N: usize, PP, QQ, DAI, E, QIP> DistantAtIndex + for DBSCANPoints<'a, N, PP, QQ, DAI, E, QIP> where - PP: IntenseAtIndex + std::marker::Send + ?Sized, - QQ: AsNDPointsAtIndex + ?Sized, + PP: IntenseAtIndex + Sync + Send + ?Sized, + QQ: AsNDPointsAtIndex + Send + Sync + ?Sized, + DAI: DistantAtIndex + ?Sized, + QIP: QueriableIndexedPoints + std::marker::Sync, +{ + fn distance_at_indices( + &self, + a: usize, + b: usize, + ) -> E { + self.raw_dist.distance_at_indices(a, b) + } +} + +impl<'a, const N: usize, PP, QQ, D, E, QIP> IntenseAtIndex + for DBSCANPoints<'a, N, PP, QQ, D, E, QIP> +where + PP: IntenseAtIndex + std::marker::Send + Sync + ?Sized, + QQ: AsNDPointsAtIndex + Send + Sync + ?Sized, D: DistantAtIndex + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + QIP: QueriableIndexedPoints + std::marker::Sync, { - fn get_intensity_at_index( + fn intensity_at_index( &self, index: usize, ) -> u64 { self.raw_elements.intensity_at_index(index) } - fn get_ndpoint_at_index( + fn weight_at_index( + &self, + index: usize, + ) -> u64 { + self.raw_elements.weight_at_index(index) + } + + fn intensity_index_length(&self) -> usize { + self.raw_elements.intensity_index_length() + } +} + +impl<'a, const N: usize, PP, QQ, D, E, QIP> AsNDPointsAtIndex + for DBSCANPoints<'a, N, PP, QQ, D, E, QIP> +where + PP: IntenseAtIndex + std::marker::Send + Sync + ?Sized, + QQ: AsNDPointsAtIndex + Send + Sync + ?Sized, + D: DistantAtIndex + ?Sized, + QIP: QueriableIndexedPoints + std::marker::Sync, +{ + fn get_ndpoint( &self, index: usize, ) -> NDPoint { self.projected_elements.get_ndpoint(index) } - fn get_distance_at_indices( - &self, - a: usize, - b: usize, - ) -> E { - self.raw_dist.distance_at_indices(a, b) + fn num_ndpoints(&self) -> usize { + self.projected_elements.num_ndpoints() } } -impl<'a, 'b: 'a, const N: usize, D> DBSCANRunner<'a, N, D> +impl<'c, 'b: 'c, 'a: 'b, const N: usize, D, FF> DBSCANRunner<'b, N, D, FF> where - D: Sync, + D: Sync + Send + 'a, + FF: Fn(&D) -> bool + Send + Sync + 'a + ?Sized, { fn run( &self, - raw_elements: &'b PP, // Vec, // trait impl Index + raw_elements: &'a PP, // Vec, // trait impl Index intensity_sorted_indices: Vec<(usize, u64)>, - indexed_points: &'b QIP, - projected_elements: &'b PE, //[NDPoint], // trait impl AsNDPointAtIndex> - raw_distance_calculator: &'b DAI, + indexed_points: &'a QIP, + projected_elements: &'a PE, //[NDPoint], // trait impl AsNDPointAtIndex> + raw_distance_calculator: &'a DAI, ) -> ClusterLabels where PP: IntenseAtIndex + Send + Sync + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, + PE: AsNDPointsAtIndex + Send + Sync + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync + std::fmt::Debug, + QIP: QueriableIndexedPoints + std::marker::Sync + std::fmt::Debug, { + if self.progress { + debug!("Starting DBSCAN"); + debug!("Params: {:?}", self); + } let usize_filterfun = match self.filter_fun { Some(filterfun) => { let cl = |a: &usize, b: &usize| { @@ -306,7 +396,7 @@ where _phantom_metric: PhantomData, }; // Q: if filter fun is required ... why is it an option? - state = self.process_points(state, &points); + state = self.process_points(state, Arc::new(points)); state = self.report_timers(state); self.take_cluster_labels(state) @@ -325,19 +415,26 @@ where &self, state: DBSCANRunnerState, ) -> ClusterLabels { + if self.progress { + debug!("Finished DBSCAN"); + debug!( + "Exporting Num clusters: {}", + state.cluster_labels.num_clusters + ); + } state.cluster_labels } fn process_points( &self, mut state: DBSCANRunnerState, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc>, ) -> DBSCANRunnerState where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, + PP: IntenseAtIndex + Send + Sync + ?Sized, + PE: AsNDPointsAtIndex + Sync + Send + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + QIP: QueriableIndexedPoints + std::marker::Sync, { let my_progbar = state.create_progress_bar(points.intensity_sorted_indices.len(), self.progress); @@ -349,7 +446,7 @@ where { self.process_single_point( *point_index, - &points, + Arc::clone(&points), &mut state.cluster_labels, &mut state.filter_fun_cache, &mut state.timers, @@ -361,18 +458,18 @@ where /// This method gets applied to every point in decreasing intensity order. fn process_single_point( - &self, + &'b self, point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, + PP: IntenseAtIndex + Send + Sync + ?Sized, + PE: AsNDPointsAtIndex + Send + Sync + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + QIP: QueriableIndexedPoints + Sync, { if cluster_labels.get(point_index) != ClusterLabel::Unassigned { return; @@ -380,7 +477,7 @@ where let neighbors = self.find_main_loop_neighbors( point_index, - points, + Arc::clone(&points), filter_fun_cache, timers, cc_metrics, @@ -388,7 +485,7 @@ where // trace!("Neighbors: {:?}", neighbors); - if !self.is_core_point(&neighbors, points.raw_elements, timers) { + if !self.is_core_point(&neighbors, Arc::clone(&points), timers) { cluster_labels.set_noise(point_index); return; } @@ -403,43 +500,49 @@ where ); } - fn find_main_loop_neighbors( + fn find_main_loop_neighbors( &self, point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, cc_metrics: &mut CandidateCountMetrics, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: AsNDPointsAtIndex + + DistantAtIndex + + QueriableIndexedPoints + + IntenseAtIndex + + Send + + Sync + + ?Sized, { timers.outer_loop_nn_timer.reset_start(); - let binding = points.projected_elements.get_ndpoint(point_index); - let query_elems = convert_to_bounds_query(&binding); - let mut candidate_neighbors = points - .indexed_points - .query_ndrange(&query_elems.0, query_elems.1); + let binding = points.get_ndpoint(point_index); + let mut candidate_neighbors = points.query_ndpoint(&binding); + // Every point should have at least itself as a neighbor. + debug_assert!( + !candidate_neighbors.is_empty(), + "No neighbors found, {}, {:?}, at least itself should be a neighbor", + point_index, + binding + ); // trace!("Query elems: {:?}", query_elems); // trace!("Candidate neighbors: {:?}", candidate_neighbors); if cfg!(debug_assertions) { + let max_i = candidate_neighbors.iter().max().unwrap(); // Make sure all generated neighbors are within the bounds. - for i in candidate_neighbors.iter() { - assert!( - *i < points.projected_elements.num_ndpoints(), - "Index: {} out of proj elems bounds", - i - ); - assert!( - *i < points.raw_elements.intensity_index_length(), - "Index: {} out of intensity bounds", - i - ); - } + assert!( + *max_i < points.num_ndpoints(), + "Index: {} out of proj elems bounds", + max_i, + ); + assert!( + *max_i < points.intensity_index_length(), + "Index: {} out of intensity bounds", + max_i + ); } timers.outer_loop_nn_timer.stop(false); @@ -454,9 +557,8 @@ where match res_in_cache { Some(res) => res, None => { - let res = (self.filter_fun.unwrap())( - &points.get_distance_at_indices(*i, point_index), - ); + let res = + (self.filter_fun.unwrap())(&points.distance_at_indices(*i, point_index)); tmp.set(*i, point_index, res); res }, @@ -478,7 +580,7 @@ where fn is_core_point( &self, neighbors: &[usize], - raw_elements: &'a PP, + points: Arc, timers: &mut DBScanTimers, ) -> bool where @@ -487,7 +589,7 @@ where timers.outer_intensity_calculation.reset_start(); let neighbor_intensity_total = neighbors .iter() - .map(|i| raw_elements.intensity_at_index(*i)) + .map(|i| points.intensity_at_index(*i)) .sum::(); timers.outer_intensity_calculation.stop(false); return neighbor_intensity_total >= self.min_intensity; @@ -497,15 +599,15 @@ where &self, apex_point_index: usize, neighbors: Vec, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc>, cluster_labels: &mut ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, + PP: IntenseAtIndex + Sync + Send + ?Sized, + PE: AsNDPointsAtIndex + Send + Sync + ?Sized, DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + QIP: QueriableIndexedPoints + std::marker::Sync, { cluster_labels.set_new_cluster(apex_point_index); let mut seed_set: Vec = neighbors; @@ -515,12 +617,13 @@ where continue; } - let local_neighbors = self.find_local_neighbors(neighbor_index, points, timers); + let local_neighbors = + self.find_local_neighbors(neighbor_index, Arc::clone(&points), timers); let filtered_neighbors = self.filter_neighbors_inner_loop( local_neighbors, apex_point_index, neighbor_index, - points, + Arc::clone(&points), cluster_labels, filter_fun_cache, timers, @@ -548,113 +651,108 @@ where } } - fn find_local_neighbors( + fn find_local_neighbors( &self, neighbor_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: AsNDPointsAtIndex + ?Sized + QueriableIndexedPoints + std::marker::Sync + 'a, { timers.inner_loop_nn_timer.reset_start(); - let binding = points.projected_elements.get_ndpoint(neighbor_index); - let inner_query_elems = convert_to_bounds_query(&binding); + let binding = Arc::clone(&points).get_ndpoint(neighbor_index); let local_neighbors: Vec = points - .indexed_points - .query_ndrange(&inner_query_elems.0, inner_query_elems.1) - .iter_mut() + .query_ndpoint(&binding) + .iter() .map(|x| *x) .collect::>(); + // Should I warn if nothing is gotten here? + // every point should have at least itself as a neighbor ... + debug_assert!(!local_neighbors.is_empty()); timers.inner_loop_nn_timer.stop(false); local_neighbors } - fn filter_neighbors_inner_loop( + fn filter_neighbors_inner_loop( &self, local_neighbors: Vec, cluster_apex_point_index: usize, current_center_point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, cluster_labels: &ClusterLabels, filter_fun_cache: &mut Option, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: + IntenseAtIndex + Send + AsNDPointsAtIndex + DistantAtIndex + Send + Sync + ?Sized, { let filtered = self.apply_filter_fun( local_neighbors, cluster_apex_point_index, - points, + Arc::clone(&points), filter_fun_cache, ); - if !self.is_extension_core_point(&filtered, current_center_point_index, points, timers) { + if !self.is_extension_core_point( + &filtered, + current_center_point_index, + Arc::clone(&points), + timers, + ) { return Vec::new(); } let unassigned = self.filter_unassigned(filtered, cluster_labels); - let unassigned_in_global_distance = - self.filter_by_apex_distance(unassigned, cluster_apex_point_index, points, timers); + let unassigned_in_global_distance = self.filter_by_apex_distance( + unassigned, + cluster_apex_point_index, + Arc::clone(&points), + timers, + ); self.filter_by_local_intensity_and_distance( unassigned_in_global_distance, current_center_point_index, - points, + Arc::clone(&points), timers, ) } - fn filter_by_apex_distance( + fn filter_by_apex_distance( &self, mut neighbors: Vec, apex_point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: AsNDPointsAtIndex + ?Sized, { timers.local_neighbor_filter_timer.reset_start(); - let query_point = &points.projected_elements.get_ndpoint(apex_point_index); - neighbors.retain(|&i| { - self.is_within_max_distance(&points.projected_elements.get_ndpoint(i), query_point) - }); + let query_point = &points.get_ndpoint(apex_point_index); + neighbors.retain(|&i| self.is_within_max_distance(&points.get_ndpoint(i), query_point)); timers.local_neighbor_filter_timer.stop(false); neighbors } - fn is_extension_core_point( + fn is_extension_core_point( &self, neighbors: &[usize], current_center_point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, timers: &mut DBScanTimers, ) -> bool where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: IntenseAtIndex + Sync + Send + ?Sized, { timers.inner_intensity_calculation.reset_start(); let mut neighbor_intensity_total: u64 = neighbors .iter() - .map(|&i| points.raw_elements.intensity_at_index(i)) + .map(|&i| points.intensity_at_index(i)) .sum(); - neighbor_intensity_total += points - .raw_elements - .intensity_at_index(current_center_point_index); + neighbor_intensity_total += points.intensity_at_index(current_center_point_index); timers.inner_intensity_calculation.stop(false); neighbors.len() >= self.min_n && neighbor_intensity_total >= self.min_intensity @@ -668,27 +766,23 @@ where /// one could pass a function that checks if the chromatograms a high correlation. /// Because two might share the same point in space, intensity is not really /// relevant but co-elution might be critical. - fn apply_filter_fun( + fn apply_filter_fun( &self, local_neighbors: Vec, point_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, filter_fun_cache: &mut Option, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: DistantAtIndex + IntenseAtIndex + Sync + Send + ?Sized, { if let Some(cache) = filter_fun_cache { local_neighbors .into_iter() .filter(|&i| { cache.get(i, point_index).unwrap_or_else(|| { - let res = (self.filter_fun.unwrap())( - &points.get_distance_at_indices(i, point_index), - ); + let res = + (self.filter_fun.unwrap())(&points.distance_at_indices(i, point_index)); cache.set(i, point_index, res); res }) @@ -708,27 +802,23 @@ where neighbors } - fn filter_by_local_intensity_and_distance( + fn filter_by_local_intensity_and_distance( &self, mut neighbors: Vec, neighbor_index: usize, - points: &DBSCANPoints<'a, N, PP, PE, DAI, D, QIP>, + points: Arc, timers: &mut DBScanTimers, ) -> Vec where - PP: IntenseAtIndex + Send + ?Sized, - PE: AsNDPointsAtIndex + ?Sized, - DAI: DistantAtIndex + Send + Sync + ?Sized, - QIP: QueriableIndexedPoints<'a, N> + std::marker::Sync, + PTS: IntenseAtIndex + AsNDPointsAtIndex + Sync + Send + ?Sized, { timers.local_neighbor_filter_timer.reset_start(); - let query_intensity = points.raw_elements.intensity_at_index(neighbor_index); - let query_point = &points.projected_elements.get_ndpoint(neighbor_index); + let query_intensity = points.intensity_at_index(neighbor_index); + let query_point = &points.get_ndpoint(neighbor_index); neighbors.retain(|&i| { - let going_downhill = points.raw_elements.intensity_at_index(i) <= query_intensity; - let within_distance = - self.is_within_max_distance(&points.projected_elements.get_ndpoint(i), query_point); + let going_downhill = points.intensity_at_index(i) <= query_intensity; + let within_distance = self.is_within_max_distance(&points.get_ndpoint(i), query_point); going_downhill && within_distance }); @@ -753,10 +843,11 @@ pub fn dbscan_label_clusters< 'a, const N: usize, RE: IntenseAtIndex + DistantAtIndex + Send + Sync + AsAggregableAtIndex + ?Sized, - T: QueriableIndexedPoints<'a, N> + Send + std::marker::Sync + std::fmt::Debug, + T: QueriableIndexedPoints + Send + std::marker::Sync + std::fmt::Debug, PE: AsNDPointsAtIndex + Send + Sync + ?Sized, D: Send + Sync, E: Send + Sync + Copy, + FF: Fn(&D) -> bool + Send + Sync + ?Sized, >( indexed_points: &'a T, raw_elements: &'a RE, @@ -764,7 +855,7 @@ pub fn dbscan_label_clusters< min_n: usize, min_intensity: u64, intensity_sorted_indices: Vec<(usize, u64)>, - filter_fun: Option<&'a (dyn Fn(&D) -> bool + Send + Sync)>, + filter_fun: Option<&'a FF>, progress: bool, max_extension_distances: &'a [f32; N], ) -> ClusterLabels { @@ -774,6 +865,7 @@ pub fn dbscan_label_clusters< progress, filter_fun: filter_fun, max_extension_distances, + _phantom: PhantomData::, }; let cluster_labels = runner.run( diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index adfc542..763d67e 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,3 +1,4 @@ +use core::fmt::Debug; use core::panic; use serde::{Deserialize, Serialize}; @@ -19,7 +20,7 @@ use crate::utils; use crate::utils::maybe_save_json_if_debugging; use indicatif::ParallelProgressIterator; -use log::{info, trace, warn}; +use log::{debug, info, trace, warn}; use rayon::prelude::*; use timsrust::Frame; @@ -163,20 +164,6 @@ fn denoise_frame_slice_window( let ref_frame_parent_index = fsw.window[fsw.reference_index].parent_frame_index; let saved_first = maybe_save_json_if_debugging(&fsw, &*format!("fsw_{}", ref_frame_parent_index), false); - // dbscan_aggregate( - // &fsw, - // &fsw, - // &fsw, - // timer, - // min_n, - // min_intensity, - // TimsPeakAggregator::default, - // None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, - // utils::LogLevel::TRACE, - // false, - // &[max_mz_extension as f32, max_ims_extension], - // false, - // ); let mut intensity_sorted_indices = Vec::with_capacity(fsw.num_ndpoints()); for i in 0..fsw.num_ndpoints() { @@ -184,7 +171,16 @@ fn denoise_frame_slice_window( let intensity = fsw.intensity_at_index(i); intensity_sorted_indices.push((i, intensity)); } - intensity_sorted_indices.par_sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + intensity_sorted_indices.sort_unstable_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); + if cfg!(debug_assertions) { + // I know this should be obviously always true, but I dont trust myself + // and thinking about orderings. + let mut last_intensity = u64::MAX; + for (_i, intensity) in intensity_sorted_indices.iter() { + assert!(*intensity <= last_intensity); + last_intensity = *intensity; + } + } let mut i_timer = timer.start_sub_timer("dbscan"); let cluster_labels = dbscan_label_clusters( @@ -329,7 +325,7 @@ where where Self: Sync, { - info!("Denoising {} frames", elems.len()); + debug!("Denoising {} frames", elems.len()); // randomly viz 1/200 frames // Selecting a slice of 1/200 frames @@ -417,7 +413,7 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> where Self: Sync, { - info!("Denoising {} frames", elems.len()); + info!("Denoising (centroiding) {} frames", elems.len()); let mut frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); @@ -430,6 +426,12 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> frame_window_slices.truncate(num_windows); } + // This warning reders to denoise_frame_slice_window. + // to have them be not hard-coded I need a way to convert + // m/z space ranges to tof indices ... which is not exposed + // by timsrust ... + warn!("Using prototype function for denoising, scalings are hard-coded"); + let mut out = Vec::with_capacity(frame_window_slices.len()); let num_windows = frame_window_slices.len(); for (i, sv) in frame_window_slices.iter().enumerate() { @@ -470,16 +472,16 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> .collect::>() }; - info!("Denoised {} frames", denoised_elements.len()); + debug!("Denoised {} frames", denoised_elements.len()); denoised_elements .retain(|x| x.frame.raw_peaks.iter().map(|y| y.intensity).sum::() > 20); - info!("Retained {} frames", denoised_elements.len()); + debug!("Retained {} frames", denoised_elements.len()); let end_tot_peaks = denoised_elements .iter() .map(|x| x.frame.raw_peaks.len() as u64) .sum::(); let ratio = end_tot_peaks as f64 / start_tot_peaks as f64; - info!( + debug!( "Start peaks: {}, End peaks: {} -> ratio: {:.2}", start_tot_peaks, end_tot_peaks, ratio ); diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 74c8e61..1ab9d68 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -1,15 +1,17 @@ -use crate::aggregation::aggregators::ClusterAggregator; +use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; use crate::aggregation::chromatograms::{ BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, }; use crate::aggregation::dbscan::dbscan::dbscan_generic; +use crate::aggregation::dbscan::runner::dbscan_label_clusters; use crate::ms::frames::DenseFrameWindow; use crate::space::space_generics::{ - AsAggregableAtIndex, DistantAtIndex, HasIntensity, NDPoint, NDPointConverter, TraceLike, + AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, NDPoint, + NDPointConverter, QueriableIndexedPoints, TraceLike, }; use crate::space::space_generics::{IntenseAtIndex, NDBoundary}; use crate::utils; -use crate::utils::RollingSDCalculator; +use crate::utils::{binary_search_slice, RollingSDCalculator}; use core::panic; use log::{debug, error, info, warn}; @@ -227,45 +229,34 @@ pub fn combine_traces( .map(_flatten_denseframe_vec) .collect(); + let combine_lambda = |x: Vec| { + combine_single_window_traces2( + x, + config.mz_scaling.into(), + config.max_mz_expansion_ratio, + config.rt_scaling.into(), + config.max_rt_expansion_ratio, + config.ims_scaling.into(), + config.max_ims_expansion_ratio, + config.min_n.into(), + config.min_neighbor_intensity, + rt_binsize, + ) + }; + // Combine the traces let out: Vec = if cfg!(feature = "less_parallel") { warn!("Running in single-threaded mode"); grouped_windows .into_iter() - .map(|x| { - combine_single_window_traces( - x, - config.mz_scaling.into(), - config.max_mz_expansion_ratio, - config.rt_scaling.into(), - config.max_rt_expansion_ratio, - config.ims_scaling.into(), - config.max_ims_expansion_ratio, - config.min_n.into(), - config.min_neighbor_intensity, - rt_binsize, - ) - }) + .map(combine_lambda) .flatten() .collect() } else { grouped_windows - .into_par_iter() - .map(|x| { - combine_single_window_traces( - x, - config.mz_scaling.into(), - config.max_mz_expansion_ratio, - config.rt_scaling.into(), - config.max_rt_expansion_ratio, - config.ims_scaling.into(), - config.max_ims_expansion_ratio, - config.min_n.into(), - config.min_neighbor_intensity, - rt_binsize, - ) - }) - .flatten() + .into_par_iter() + .map(combine_lambda) + .flatten() .collect() }; @@ -465,6 +456,313 @@ impl DistantAtIndex for Vec { // Needed to specify the generic in dbscan_generic type FFTimeTimsPeak = fn(&TimeTimsPeak, &TimeTimsPeak) -> bool; +#[derive(Debug)] +struct TimeTimsPeakScaling { + mz_scaling: f32, + rt_scaling: f32, + ims_scaling: f32, + quad_scaling: f32, +} + +#[derive(Debug)] +struct QueriableTimeTimsPeaks { + peaks: Vec, + min_bucket_mz_vals: Vec, + bucket_size: usize, + scalings: TimeTimsPeakScaling, +} + +impl QueriableTimeTimsPeaks { + fn new( + mut peaks: Vec, + scalings: TimeTimsPeakScaling, + ) -> Self { + const BUCKET_SIZE: usize = 16384; + // // Sort all of our theoretical fragments by m/z, from low to high + peaks.par_sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); + + let mut min_bucket_mz_vals = peaks + .par_chunks_mut(BUCKET_SIZE) + .map(|bucket| { + let min = bucket[0].mz; + bucket.par_sort_unstable_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); + min as f32 + }) + .collect::>(); + + // Get the max value of the last bucket + let max_bucket_mz = peaks[peaks.len().saturating_sub(BUCKET_SIZE)..peaks.len()] + .iter() + .max_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()) + .unwrap() + .mz as f32; + min_bucket_mz_vals.push(max_bucket_mz); + + QueriableTimeTimsPeaks { + peaks, + min_bucket_mz_vals, + bucket_size: BUCKET_SIZE, + scalings, + } + } + + fn get_bucket_at( + &self, + index: usize, + ) -> Result<&[TimeTimsPeak], ()> { + let page_start = index * self.bucket_size; + if page_start >= self.peaks.len() { + return Err(()); + } + let page_end = (page_start + self.bucket_size).min(self.peaks.len()); + let tmp = &self.peaks[page_start..page_end]; + + if cfg!(debug_assertions) { + // Make sure all rts are sorted within the bucket + for i in 1..tmp.len() { + if tmp[i - 1].rt > tmp[i].rt { + panic!("RTs are not sorted within the bucket"); + } + } + } + Ok(tmp) + } + + fn get_intensity_sorted_indices(&self) -> Vec<(usize, u64)> { + let mut indices: Vec<(usize, u64)> = (0..self.peaks.len()) + .map(|i| (i, self.peaks[i].intensity)) + .collect(); + indices.par_sort_unstable_by_key(|&x| x.1); + + debug_assert!(indices.len() == self.peaks.len()); + if cfg!(debug_assertions) { + if indices.len() > 1 { + for i in 1..indices.len() { + if indices[i - 1].1 > indices[i].1 { + panic!("Indices are not sorted"); + } + } + } + } + indices + } +} + +impl AsNDPointsAtIndex<3> for QueriableTimeTimsPeaks { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<3> { + NDPoint { + values: [ + self.peaks[index].mz as f32, + self.peaks[index].rt, + self.peaks[index].ims, + ], + } + } + + fn num_ndpoints(&self) -> usize { + self.peaks.len() + } +} + +impl IntenseAtIndex for QueriableTimeTimsPeaks { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self.peaks[index].intensity + } + + fn intensity_index_length(&self) -> usize { + self.peaks.len() + } +} + +impl AsAggregableAtIndex for QueriableTimeTimsPeaks { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> TimeTimsPeak { + self.peaks[index] + } + + fn num_aggregable(&self) -> usize { + self.peaks.len() + } +} + +impl DistantAtIndex for QueriableTimeTimsPeaks { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + let a = self.peaks[index]; + let b = self.peaks[other]; + let mz = (a.mz - b.mz) as f32 / self.scalings.mz_scaling; + let rt = (a.rt - b.rt) as f32 / self.scalings.rt_scaling; + let ims = (a.ims - b.ims) as f32 / self.scalings.ims_scaling; + (mz * mz + rt * rt + ims * ims).sqrt() + } +} + +impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { + fn query_ndpoint( + &self, + point: &NDPoint<3>, + ) -> Vec { + let boundary = NDBoundary::new( + [ + (point.values[0] - self.scalings.mz_scaling) - f32::EPSILON, + (point.values[1] - self.scalings.rt_scaling), + (point.values[2] - self.scalings.ims_scaling) - f32::EPSILON, + ], + [ + (point.values[0] + self.scalings.mz_scaling) + f32::EPSILON, + (point.values[1] + self.scalings.rt_scaling), + (point.values[2] + self.scalings.ims_scaling) + f32::EPSILON, + ], + ); + let out = self.query_ndrange(&boundary, None); + out + } + + fn query_ndrange( + &self, + boundary: &NDBoundary<3>, + reference_point: Option<&NDPoint<3>>, + ) -> Vec { + let mut out = Vec::new(); + let mz_range = (boundary.starts[0], boundary.ends[0]); + let mz_range_f64 = (boundary.starts[0] as f64, boundary.ends[0] as f64); + let rt_range = (boundary.starts[1], boundary.ends[1]); + let ims_range = (boundary.starts[2], boundary.ends[2]); + + let (bstart, bend) = binary_search_slice( + &self.min_bucket_mz_vals, + |a, b| a.total_cmp(b), + mz_range.0, + mz_range.1, + ); + + let bstart = bstart.saturating_sub(1); + let bend_new = bend.saturating_add(1).min(self.min_bucket_mz_vals.len()); + + for bnum in bstart..bend_new { + let c_bucket = self.get_bucket_at(bnum); + if c_bucket.is_err() { + continue; + } + let c_bucket = c_bucket.unwrap(); + let page_start = bnum * self.bucket_size; + + let (istart, iend) = + binary_search_slice(c_bucket, |a, b| a.rt.total_cmp(&b), rt_range.0, rt_range.1); + + for (j, peak) in self.peaks[(istart + page_start)..(iend + page_start)] + .iter() + .enumerate() + { + debug_assert!( + peak.rt >= rt_range.0 && peak.rt <= rt_range.1, + "RT out of range -> {} {} {}; istart {}, page_starrt {}, j {}; window rts: {:?}", + peak.rt, + rt_range.0, + rt_range.1, + istart, + page_start, + j, + &self.peaks[(j + istart + page_start).saturating_sub(5) + ..(j + istart + page_start + 5).min(self.peaks.len())] + .iter() + .map(|x| x.rt) + .collect::>() + ); + if peak.ims >= ims_range.0 && peak.ims <= ims_range.1 { + if peak.mz as f32 >= mz_range.0 && peak.mz as f32 <= mz_range.1 { + out.push(j + istart + page_start); + } + } + } + } + + out + } +} + +// QueriableIndexedPoints + +fn combine_single_window_traces2( + prefiltered_peaks: Vec, + mz_scaling: f64, + max_mz_expansion_ratio: f32, + rt_scaling: f64, + max_rt_expansion_ratio: f32, + ims_scaling: f64, + max_ims_expansion_ratio: f32, + min_n: usize, + min_intensity: u32, + rt_binsize: f32, +) -> Vec { + let timer = utils::ContextTimer::new("dbscan_wt2", true, utils::LogLevel::DEBUG); + info!("Peaks in window: {}", prefiltered_peaks.len()); + let scalings = TimeTimsPeakScaling { + mz_scaling: mz_scaling as f32, + rt_scaling: rt_scaling as f32, + ims_scaling: ims_scaling as f32, + quad_scaling: 1., + }; + let window_quad_low_high = ( + prefiltered_peaks[0].quad_low_high.0, + prefiltered_peaks[0].quad_low_high.1, + ); + let index = QueriableTimeTimsPeaks::new(prefiltered_peaks, scalings); + let intensity_sorted_indices = index.get_intensity_sorted_indices(); + let max_extension_distances: [f32; 3] = [ + max_mz_expansion_ratio * mz_scaling as f32, + max_rt_expansion_ratio * rt_scaling as f32, + max_ims_expansion_ratio * ims_scaling as f32, + ]; + + let mut i_timer = timer.start_sub_timer("dbscan"); + let cluster_labels = dbscan_label_clusters( + &index, + &index, + &index, + min_n, + min_intensity.into(), + intensity_sorted_indices, + None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, + true, + &max_extension_distances, + ); + + i_timer.stop(true); + + let centroids = aggregate_clusters( + cluster_labels.num_clusters, + cluster_labels.cluster_labels, + &index, + &|| TraceAggregator { + mz: RollingSDCalculator::default(), + intensity: 0, + rt: RollingSDCalculator::default(), + ims: RollingSDCalculator::default(), + num_peaks: 0, + num_rt_peaks: 0, + quad_low_high: window_quad_low_high, + btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize), + }, + utils::LogLevel::TRACE, + false, + ); + + debug!("Combined traces: {}", centroids.len()); + centroids +} + // TODO maybe this can be a builder-> executor pattern fn combine_single_window_traces( prefiltered_peaks: Vec, diff --git a/src/main.rs b/src/main.rs index 8ae427a..105cb54 100644 --- a/src/main.rs +++ b/src/main.rs @@ -146,7 +146,7 @@ fn main() { } println!("traces: {:?}", traces.len()); - traces.retain(|x| x.num_agg > 5); + traces.retain(|x| x.num_agg > 3); println!("traces: {:?}", traces.len()); if traces.len() > 5 { println!("sample_trace: {:?}", traces[traces.len() - 4]) diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index 0dd98df..d9e1d89 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -719,7 +719,7 @@ impl<'a> AsNDPointsAtIndex<2> for FrameSlice<'a> { } } -impl QueriableIndexedPoints<'_, 2> for ExpandedFrameSlice { +impl QueriableIndexedPoints<2> for ExpandedFrameSlice { fn query_ndpoint( &self, point: &NDPoint<2>, @@ -760,9 +760,9 @@ impl QueriableIndexedPoints<'_, 2> for ExpandedFrameSlice { } } -impl<'a> QueriableIndexedPoints<'a, 2> for FrameSlice<'a> { +impl<'a> QueriableIndexedPoints<2> for FrameSlice<'a> { fn query_ndpoint( - &'a self, + &self, point: &NDPoint<2>, ) -> Vec { let tof_index = point.values[0] as i32; @@ -788,7 +788,7 @@ impl<'a> QueriableIndexedPoints<'a, 2> for FrameSlice<'a> { } fn query_ndrange( - &'a self, + &self, boundary: &NDBoundary<2>, reference_point: Option<&NDPoint<2>>, ) -> Vec { diff --git a/src/ms/frames/frame_slice_rt_window.rs b/src/ms/frames/frame_slice_rt_window.rs index 73cffb0..69565cf 100644 --- a/src/ms/frames/frame_slice_rt_window.rs +++ b/src/ms/frames/frame_slice_rt_window.rs @@ -128,9 +128,9 @@ impl IntenseAtIndex for FrameSliceWindow<'_> { } } -impl<'a> QueriableIndexedPoints<'a, 2> for FrameSliceWindow<'a> { +impl<'a> QueriableIndexedPoints<2> for FrameSliceWindow<'a> { fn query_ndpoint( - &'a self, + &self, point: &NDPoint<2>, ) -> Vec { let mut out = Vec::new(); @@ -147,7 +147,7 @@ impl<'a> QueriableIndexedPoints<'a, 2> for FrameSliceWindow<'a> { } fn query_ndrange( - &'a self, + &self, boundary: &crate::space::space_generics::NDBoundary<2>, reference_point: Option<&NDPoint<2>>, ) -> Vec { diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index 5df1beb..fa4f348 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -260,16 +260,16 @@ impl<'a, const D: usize, T> RadiusKDTree<'a, T, D> { } } -impl<'a, const D: usize> QueriableIndexedPoints<'a, D> for RadiusKDTree<'a, usize, D> { +impl<'a, const D: usize> QueriableIndexedPoints for RadiusKDTree<'a, usize, D> { fn query_ndpoint( - &'a self, + &self, point: &NDPoint, ) -> Vec { self.query(point).into_iter().map(|x| *x).collect() } fn query_ndrange( - &'a self, + &self, boundary: &NDBoundary, reference_point: Option<&NDPoint>, ) -> Vec { diff --git a/src/space/quad.rs b/src/space/quad.rs index dbb0710..6dc8851 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -255,9 +255,9 @@ impl<'a, T> RadiusQuadTree<'a, T> { // TODO: rename count_neigh_monotonocally_increasing // because it can do more than just count neighbors.... -impl<'a> QueriableIndexedPoints<'a, 2> for RadiusQuadTree<'a, usize> { +impl<'a> QueriableIndexedPoints<2> for RadiusQuadTree<'a, usize> { fn query_ndpoint( - &'a self, + &self, point: &NDPoint<2>, ) -> Vec { self.query(point) @@ -267,7 +267,7 @@ impl<'a> QueriableIndexedPoints<'a, 2> for RadiusQuadTree<'a, usize> { } fn query_ndrange( - &'a self, + &self, boundary: &NDBoundary<2>, reference_point: Option<&NDPoint<2>>, ) -> Vec { diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index d506d5b..cd806c0 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -104,13 +104,13 @@ pub struct NDPoint { pub values: [f32; DIMENSIONALITY], } -pub trait QueriableIndexedPoints<'a, const N: usize> { +pub trait QueriableIndexedPoints { fn query_ndpoint( - &'a self, + &self, point: &NDPoint, ) -> Vec; fn query_ndrange( - &'a self, + &self, boundary: &NDBoundary, reference_point: Option<&NDPoint>, ) -> Vec; diff --git a/src/utils.rs b/src/utils.rs index d25f6d5..8d8b705 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -2,6 +2,7 @@ use log::{debug, info, trace, warn}; use num::cast::AsPrimitive; use std::{ cmp::Ordering, + fmt::Debug, time::{Duration, Instant}, }; @@ -397,19 +398,41 @@ pub fn get_stats(data: &[f64]) -> Stats { } } -/// This has been shamelessly copied from sage. +/// This has been shamelessly copied and very minorly modified from sage. /// https://github.com/lazear/sage/blob/93a9a8a7c9f717238fc6c582c0dd501a56159be7/crates/sage/src/database.rs#L498 /// Althought it really feels like this should be in the standard lib. /// /// Usage: /// ```rust -/// let data = [1.0, 1.5, 1.5, 1.5, 1.5, 2.0, 2.5, 3.0, 3.0, 3.5, 4.0]; +/// use ionmesh::utils::binary_search_slice; +/// let data: [f64; 11]= [1.0, 1.5, 1.5, 1.5, 1.5, 2.0, 2.5, 3.0, 3.0, 3.5, 4.0]; /// let (left, right) = binary_search_slice(&data, |a: &f64, b| a.total_cmp(b), 1.5, 3.25); -/// assert!(data[left] <= 1.5); +/// assert!(data[left] == 1.5); /// assert!(data[right] > 3.25); /// assert_eq!( /// &data[left..right], -/// &[1.0, 1.5, 1.5, 1.5, 1.5, 2.0, 2.5, 3.0, 3.0] +/// &[1.5, 1.5, 1.5, 1.5, 2.0, 2.5, 3.0, 3.0] +/// ); +/// let empty: [f64; 0] = []; +/// let (left, right) = binary_search_slice(&empty, |a: &f64, b| a.total_cmp(b), 1.5, 3.25); +/// assert_eq!(left, 0); +/// assert_eq!(right, 0); +/// let (left, right) = binary_search_slice(&data, |a: &f64, b| a.total_cmp(b), -100., -99.); +/// assert_eq!(left, 0); +/// assert_eq!(right, 0); +/// assert_eq!(&data[left..right], &empty); +/// let (left, right) = binary_search_slice(&data, |a: &f64, b| a.total_cmp(b), 100., 101.); +/// assert_eq!(left, data.len()); +/// assert_eq!(right, data.len()); +/// assert_eq!(&data[left..right], &empty); +/// let data: [f64; 7]= [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0]; +/// let (left, right) = binary_search_slice(&data, |a: &f64, b| a.total_cmp(b), 1.5, 3.25); +/// assert!(data[left] == 1.5); +/// assert!(data[right] > 3.25); +/// assert!(data[right-1] < 3.25); +/// assert_eq!( +/// &data[left..right], +/// &[1.5, 2.0, 2.5, 3.0] /// ); /// ``` /// @@ -422,13 +445,23 @@ pub fn binary_search_slice( ) -> (usize, usize) where F: Fn(&T, &S) -> Ordering, + T: Debug, { let left_idx = match slice.binary_search_by(|a| key(a, &low)) { - Ok(idx) | Err(idx) => { - let mut idx = idx.saturating_sub(1); - while idx > 0 && key(&slice[idx], &low) != Ordering::Less { + Ok(mut idx) | Err(mut idx) => { + if idx == slice.len() { + // This is very non-elegant ... pretty sure I need to split + // the ok-err cases to make a more elegant solution. + return (idx, idx); + } + let mut any_nonless = false; + while idx != 0 && key(&slice[idx], &low) != Ordering::Less { + any_nonless = true; idx -= 1; } + if any_nonless { + idx = idx.saturating_add(1); + } idx }, }; @@ -442,6 +475,10 @@ where idx.min(slice.len()) }, }; + if cfg!(debug_assertions) { + // This makes sure the slice is indexable by the indices. + let _foo = &slice[left_idx..right_idx]; + }; (left_idx, right_idx) } From f1d930818f40b01cae3f1bbfe83ecabb95b7bc0d Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Wed, 24 Jul 2024 20:29:15 -0700 Subject: [PATCH 21/26] (feature) new indexing on traces and expanded how re-assignment of centroiding works --- src/aggregation/dbscan/dbscan.rs | 63 +- src/aggregation/dbscan/runner.rs | 5 +- src/aggregation/mod.rs | 2 + src/aggregation/pseudospectra.rs | 329 ++++++++ src/aggregation/queriable_collections/mod.rs | 4 + .../queriable_indexed_points.rs | 249 ++++++ .../queriable_collections/queriable_traces.rs | 258 ++++++ src/aggregation/tracing.rs | 733 +----------------- src/main.rs | 53 +- src/ms/frames/frame_slice.rs | 5 +- src/scoring.rs | 4 +- src/space/space_generics.rs | 69 +- 12 files changed, 989 insertions(+), 785 deletions(-) create mode 100644 src/aggregation/pseudospectra.rs create mode 100644 src/aggregation/queriable_collections/mod.rs create mode 100644 src/aggregation/queriable_collections/queriable_indexed_points.rs create mode 100644 src/aggregation/queriable_collections/queriable_traces.rs diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index ac4408d..954a5bf 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,12 +1,13 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; use crate::space::kdtree::RadiusKDTree; use crate::space::space_generics::{ - convert_to_bounds_query, AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, - IntenseAtIndex, NDPoint, NDPointConverter, QueriableIndexedPoints, + AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPoint, + NDPointConverter, QueriableIndexedPoints, }; use crate::utils::{self, ContextTimer}; use log::{debug, info, trace}; use rayon::prelude::*; +use std::fmt::Debug; use std::ops::{Add, Index}; use crate::aggregation::dbscan::runner::dbscan_label_clusters; @@ -15,14 +16,14 @@ use crate::aggregation::dbscan::runner::dbscan_label_clusters; // and generates a new centroid that aggregates all the points in its range. // In contrast with the dbscan method, the elements in each cluster are not necessarily // mutually exclusive. -fn reassign_centroid< +pub fn reassign_centroid< 'a, const N: usize, - T: Send + Clone + Copy, + T: Send + Clone + Copy + Debug, C: NDPointConverter, I: QueriableIndexedPoints + std::marker::Sync, G: Sync + Send + ClusterAggregator, - R: Send, + R: Send + Debug, RE: Send + Sync + AsAggregableAtIndex + ?Sized, F: Fn() -> G + Send + Sync, >( @@ -33,26 +34,51 @@ fn reassign_centroid< def_aggregator: F, log_level: utils::LogLevel, expansion_factors: &[f32; N], + max_n: Option, + sort_lambda: Option<&dyn Fn(&R, &T, &T) -> std::cmp::Ordering>, ) -> Vec { let mut timer = utils::ContextTimer::new("reassign_centroid", true, log_level); + info!("Reassigning centroids params: {:?}", expansion_factors); + info!("Reassign centroids: {}", centroids.len()); let mut out = Vec::with_capacity(centroids.len()); for centroid in centroids { let query_point = centroid_converter.convert(¢roid); - let mut query_elems = convert_to_bounds_query(&query_point); - query_elems.0.expand(expansion_factors); + let mut query_elems = indexed_points.convert_to_bounds_query(&query_point); + query_elems.0.expand_absolute(expansion_factors); + + let mut neighbors: Vec = indexed_points + .query_ndrange(&query_elems.0, query_elems.1) + .into_iter() + .map(|x| elements.get_aggregable_at_index(x)) + .collect(); + + // Optionally sort by a passed function/lambda + if let Some(sort_lambda) = sort_lambda { + neighbors.sort_by(|a, b| sort_lambda(¢roid, a, b)); + } + + // Optionally truncate the neighbors + if let Some(max_n) = max_n { + neighbors.truncate(max_n); + } + + // 1/1000 show the first and last neighbor, as well as the centroid + if neighbors.len() > 0 { + if rand::random::() < 0.001 { + println!( + "Centroid: {:?}, First: {:?}, Last: {:?}", + centroid, + neighbors[0], + neighbors[neighbors.len() - 1] + ); + } + } - // trace!("Querying for Centroid: {:?}", query_elems.1); - // trace!("Querying for Boundary: {:?}", query_elems.0); - let neighbors = indexed_points.query_ndrange(&query_elems.0, query_elems.1); - // trace!("Found {} neighbors", neighbors.len()); let mut aggregator = def_aggregator(); - let mut num_agg = 0; for neighbor in neighbors { - aggregator.add(&elements.get_aggregable_at_index(neighbor)); - num_agg += 1; + aggregator.add(&neighbor); } - trace!("Aggregated {} elements", num_agg); out.push(aggregator.aggregate()); } @@ -61,7 +87,6 @@ fn reassign_centroid< } // TODO: rename prefiltered peaks argument! -// TODO implement a version that takes a sparse distance matrix. impl AsNDPointsAtIndex for Vec> { fn get_ndpoint( @@ -79,9 +104,9 @@ impl AsNDPointsAtIndex for Vec> { pub fn dbscan_generic< C: NDPointConverter, C2: NDPointConverter, - R: Send, + R: Send + Debug, G: Sync + Send + ClusterAggregator, - T: HasIntensity + Send + Clone + Copy + Sync, + T: HasIntensity + Send + Clone + Copy + Sync + Debug, RE: IntenseAtIndex + DistantAtIndex + IntoIterator @@ -152,6 +177,8 @@ where &def_aggregator, log_level, max_extension_distances, + None, + None, ), None => centroids, } diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index f161d91..74245fb 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,8 +1,7 @@ +use crate::space::space_generics::AsAggregableAtIndex; use crate::space::space_generics::{ - convert_to_bounds_query, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, - NDBoundary, NDPoint, QueriableIndexedPoints, + AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, }; -use crate::space::space_generics::{AsAggregableAtIndex, NDPointConverter}; use crate::utils; use core::fmt::Debug; use indicatif::ProgressIterator; diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 43ed723..77473ba 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -3,4 +3,6 @@ pub mod chromatograms; pub mod converters; pub mod dbscan; pub mod ms_denoise; +pub mod pseudospectra; +pub mod queriable_collections; pub mod tracing; diff --git a/src/aggregation/pseudospectra.rs b/src/aggregation/pseudospectra.rs new file mode 100644 index 0000000..838a0ab --- /dev/null +++ b/src/aggregation/pseudospectra.rs @@ -0,0 +1,329 @@ +use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; +use crate::aggregation::chromatograms::{ + BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, +}; +use crate::aggregation::dbscan::dbscan::{dbscan_aggregate, dbscan_generic, reassign_centroid}; +use crate::aggregation::dbscan::runner::dbscan_label_clusters; +use crate::aggregation::queriable_collections::queriable_indexed_points::{ + QueriableTimeTimsPeaks, TimeTimsPeakScaling, +}; +use crate::aggregation::queriable_collections::queriable_traces::{ + BaseTraceDistance, TraceScalings, +}; +use crate::aggregation::queriable_collections::QueriableTraces; +use crate::ms::frames::DenseFrameWindow; +use crate::space::space_generics::{ + AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, NDPoint, + NDPointConverter, QueriableIndexedPoints, TraceLike, +}; +use crate::space::space_generics::{IntenseAtIndex, NDBoundary}; +use crate::utils; +use crate::utils::{binary_search_slice, RollingSDCalculator}; + +use core::panic; +use log::{debug, error, info, warn}; +use rayon::iter::IntoParallelIterator; +use rayon::prelude::*; +use serde::ser::{SerializeStruct, Serializer}; +use serde::{Deserialize, Serialize}; +use std::error::Error; +use std::io::Write; +use std::path::Path; + +use super::tracing::BaseTrace; + +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +pub struct PseudoscanGenerationConfig { + pub rt_scaling: f32, + pub quad_scaling: f32, + pub ims_scaling: f32, + pub max_rt_expansion_ratio: f32, + pub max_quad_expansion_ratio: f32, + pub max_ims_expansion_ratio: f32, + pub min_n: u8, + pub min_neighbor_intensity: u32, +} + +impl Default for PseudoscanGenerationConfig { + fn default() -> Self { + PseudoscanGenerationConfig { + rt_scaling: 2.4, + quad_scaling: 5., + ims_scaling: 0.015, + max_rt_expansion_ratio: 5., + max_quad_expansion_ratio: 1., + max_ims_expansion_ratio: 2., + min_n: 6, + min_neighbor_intensity: 6000, + } + } +} + +impl NDPointConverter for PseudoscanGenerationConfig { + fn convert( + &self, + elem: &PseudoSpectrum, + ) -> NDPoint<2> { + // let quad_mid = (elem.quad_low + elem.quad_high) / 2.; + NDPoint { + values: [elem.rt as f32, elem.ims as f32], + } + } +} + +pub fn combine_pseudospectra( + traces: Vec>, + config: PseudoscanGenerationConfig, +) -> Vec { + traces + .into_iter() + .map(|x| combine_single_pseudospectra_window(x, config.clone())) + .flatten() + .collect() +} + +pub fn combine_single_pseudospectra_window( + traces: Vec, + config: PseudoscanGenerationConfig, +) -> Vec { + let mut timer = + utils::ContextTimer::new("Combining pseudospectra??", true, utils::LogLevel::INFO); + + // let converter = BaseTraceConverter { + // rt_scaling: config.rt_scaling.into(), + // ims_scaling: config.ims_scaling.into(), + // quad_scaling: config.quad_scaling.into(), + // // rt_start_end_ratio: 2., + // // peak_width_prior: 0.75, + // }; + + const IOU_THRESH: f32 = 0.3; + const COSINE_THRESH: f32 = 0.8; + let extra_filter_fun = |x: &BaseTraceDistance| { + let close_in_quad = (x.quad_diff).abs() < 5.0; + let within_iou_tolerance = x.iou > IOU_THRESH; + let within_cosine_tolerance = x.cosine > COSINE_THRESH; + + return close_in_quad && within_iou_tolerance && within_cosine_tolerance; + }; + + let max_extension_distances: [f32; 2] = [ + config.max_rt_expansion_ratio * config.rt_scaling, + config.max_ims_expansion_ratio * config.ims_scaling, + // config.max_quad_expansion_ratio, + ]; + + let scalings = TraceScalings { + rt_scaling: config.rt_scaling.into(), + ims_scaling: config.ims_scaling.into(), + quad_scaling: config.quad_scaling.into(), + }; + + let qtt = QueriableTraces::new(traces, scalings); + let agg_timer = timer.start_sub_timer("aggregation"); + let mut agg1 = dbscan_aggregate( + &qtt, + &qtt, + &qtt, + agg_timer, + config.min_n.into(), + config.min_neighbor_intensity.into(), + PseudoSpectrumAggregator::default, + Some(&extra_filter_fun), + utils::LogLevel::INFO, + false, + &max_extension_distances, + true, + ); + agg1.retain(|x| x.peaks.len() > 3); + + let reassign_max_distances = [config.rt_scaling, config.ims_scaling]; + + let ranking_lambda = |p: &PseudoSpectrum, a: &BaseTrace, b: &BaseTrace| { + let rt_diff = (p.rt - a.rt).abs() / config.rt_scaling; + let ims_diff = (p.ims - a.mobility).abs() / config.ims_scaling; + let quad_diff = (p.quad_low - a.quad_low).abs(); + let rt_diff_b = (p.rt - b.rt).abs() / config.rt_scaling; + let ims_diff_b = (p.ims - b.mobility).abs() / config.ims_scaling; + let quad_diff_b = (p.quad_low - b.quad_low).abs(); + let diff = rt_diff + ims_diff + quad_diff; + let diff_b = rt_diff_b + ims_diff_b + quad_diff_b; + let out = diff.total_cmp(&diff_b); + out + }; + let agg2 = reassign_centroid( + agg1, + &qtt, + config.clone(), + &qtt, + &PseudoSpectrumAggregator::default, + utils::LogLevel::INFO, + &reassign_max_distances, + Some(300), + Some(&ranking_lambda), + ); + + info!("Combined pseudospectra: {}", agg2.len()); + timer.stop(true); + agg2 +} + +pub fn write_pseudoscans_json( + pseudocscans: &[PseudoSpectrum], + out_path: impl AsRef, +) -> Result<(), Box> { + info!( + "Writting pseudoscans to json: {}", + out_path.as_ref().display() + ); + let mut file = std::fs::File::create(out_path)?; + file.write("[".as_bytes())?; + let mut is_first = true; + for x in pseudocscans { + let json = serde_json::to_string(&x)?; + if is_first { + is_first = false; + } else { + file.write(",\n".as_bytes())?; + } + file.write(json.as_bytes())?; + } + file.write("]".as_bytes())?; + + Ok(()) +} + +/// Peaks are mz-intensity pairs +type Peak = (f64, u64); + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PseudoSpectrum { + pub peaks: Vec, + pub rt: f32, + pub rt_min: f32, + pub rt_max: f32, + pub rt_std: f32, + pub rt_skew: f32, + pub ims: f32, + pub ims_std: f32, + pub ims_skew: f32, + pub quad_low: f32, + pub quad_high: f32, +} + +#[derive(Debug)] +pub struct PseudoSpectrumAggregator { + peaks: Vec, + intensity: u64, + rt: RollingSDCalculator, + ims: RollingSDCalculator, + quad_low: RollingSDCalculator, + quad_high: RollingSDCalculator, +} + +impl Default for PseudoSpectrumAggregator { + fn default() -> Self { + let nv = Vec::new(); + PseudoSpectrumAggregator { + peaks: nv, + intensity: 0, + rt: RollingSDCalculator::default(), + ims: RollingSDCalculator::default(), + // I am adding here because in the future I want to support + // the weird pasef modes. + quad_low: RollingSDCalculator::default(), + quad_high: RollingSDCalculator::default(), + } + } +} + +impl<'a> ClusterAggregator for PseudoSpectrumAggregator { + fn add( + &mut self, + peak: &BaseTrace, + ) { + debug_assert!(peak.intensity < u64::MAX - self.intensity); + + self.rt.add(peak.rt as f64, peak.intensity); + self.ims.add(peak.mobility as f64, peak.intensity); + self.quad_low.add(peak.quad_low, peak.intensity); + self.quad_high.add(peak.quad_high, peak.intensity); + self.peaks.push((peak.mz, peak.intensity)); + } + + fn aggregate(&self) -> PseudoSpectrum { + // TECHNICALLY this can error out if there are no elements... + let rt = self.rt.get_mean() as f32; + let ims = self.ims.get_mean() as f32; + let rt_skew = self.rt.get_skew() as f32; + let ims_skew = self.ims.get_skew() as f32; + let rt_std = self.rt.get_sd() as f32; + let ims_std = self.ims.get_sd() as f32; + let quad_low_high = (self.quad_low.get_mean(), self.quad_high.get_mean()); + + PseudoSpectrum { + peaks: self.peaks.clone(), + rt, + ims, + rt_min: self.rt.get_min().unwrap() as f32, + rt_max: self.rt.get_max().unwrap() as f32, + rt_std, + ims_std, + rt_skew, + ims_skew, + quad_low: quad_low_high.0, + quad_high: quad_low_high.1, + } + } + + fn combine( + self, + other: Self, + ) -> Self { + let mut peaks = self.peaks.clone(); + peaks.extend(other.peaks.clone()); + let mut rt = self.rt; + let mut ims = self.ims; + let mut quad_low = self.quad_low; + let mut quad_high = self.quad_high; + + rt.merge(&other.rt); + ims.merge(&other.ims); + quad_low.merge(&other.quad_low); + quad_high.merge(&other.quad_high); + + PseudoSpectrumAggregator { + peaks, + intensity: self.intensity + other.intensity, + rt, + ims, + quad_low, + quad_high, + } + } +} + +struct BaseTraceConverter { + rt_scaling: f64, + ims_scaling: f64, + quad_scaling: f64, +} + +impl NDPointConverter for BaseTraceConverter { + fn convert( + &self, + elem: &BaseTrace, + ) -> NDPoint<3> { + // let rt_start_use = (elem.rt - elem.rt_std).min(elem.rt - self.peak_width_prior as f32); + // let rt_end_use = (elem.rt + elem.rt_std).max(elem.rt + self.peak_width_prior as f32); + // let rt_start_end_scaling = self.rt_scaling * self.rt_start_end_ratio; + let quad_center = (elem.quad_low + elem.quad_high) / 2.; + NDPoint { + values: [ + (elem.rt as f64 / self.rt_scaling) as f32, + (elem.mobility as f64 / self.ims_scaling) as f32, + (quad_center as f64 / self.quad_scaling) as f32, + ], + } + } +} diff --git a/src/aggregation/queriable_collections/mod.rs b/src/aggregation/queriable_collections/mod.rs new file mode 100644 index 0000000..880d64c --- /dev/null +++ b/src/aggregation/queriable_collections/mod.rs @@ -0,0 +1,4 @@ +pub mod queriable_indexed_points; +pub mod queriable_traces; +pub use queriable_indexed_points::QueriableIndexedPoints; +pub use queriable_traces::QueriableTraces; diff --git a/src/aggregation/queriable_collections/queriable_indexed_points.rs b/src/aggregation/queriable_collections/queriable_indexed_points.rs new file mode 100644 index 0000000..0adb510 --- /dev/null +++ b/src/aggregation/queriable_collections/queriable_indexed_points.rs @@ -0,0 +1,249 @@ +pub use crate::{ + aggregation::tracing::TimeTimsPeak, + space::space_generics::{ + AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDBoundary, + NDPoint, QueriableIndexedPoints, + }, + utils::binary_search_slice, +}; +use rayon::prelude::*; + +#[derive(Debug)] +pub struct TimeTimsPeakScaling { + pub mz_scaling: f32, + pub rt_scaling: f32, + pub ims_scaling: f32, + pub quad_scaling: f32, +} + +#[derive(Debug)] +pub struct QueriableTimeTimsPeaks { + peaks: Vec, + min_bucket_mz_vals: Vec, + bucket_size: usize, + scalings: TimeTimsPeakScaling, +} + +impl QueriableTimeTimsPeaks { + pub fn new( + mut peaks: Vec, + scalings: TimeTimsPeakScaling, + ) -> Self { + const BUCKET_SIZE: usize = 16384; + // // Sort all of our peaks by m/z, from low to high + peaks.par_sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); + + let mut min_bucket_mz_vals = peaks + .par_chunks_mut(BUCKET_SIZE) + .map(|bucket| { + let min = bucket[0].mz; + bucket.par_sort_unstable_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); + min as f32 + }) + .collect::>(); + + // Get the max value of the last bucket + let max_bucket_mz = peaks[peaks.len().saturating_sub(BUCKET_SIZE)..peaks.len()] + .iter() + .max_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()) + .unwrap() + .mz as f32; + min_bucket_mz_vals.push(max_bucket_mz); + + QueriableTimeTimsPeaks { + peaks, + min_bucket_mz_vals, + bucket_size: BUCKET_SIZE, + scalings, + } + } + + fn get_bucket_at( + &self, + index: usize, + ) -> Result<&[TimeTimsPeak], ()> { + let page_start = index * self.bucket_size; + if page_start >= self.peaks.len() { + return Err(()); + } + let page_end = (page_start + self.bucket_size).min(self.peaks.len()); + let tmp = &self.peaks[page_start..page_end]; + + if cfg!(debug_assertions) { + // Check every 100 random queries ... + if rand::random::() % 100 == 0 { + let mut last_rt = 0.; + for i in 0..tmp.len() { + if tmp[i].rt < last_rt { + panic!("RTs are not sorted within the bucket"); + } + last_rt = tmp[i].rt; + } + } + } + Ok(tmp) + } + + pub fn get_intensity_sorted_indices(&self) -> Vec<(usize, u64)> { + let mut indices: Vec<(usize, u64)> = (0..self.peaks.len()) + .map(|i| (i, self.peaks[i].intensity)) + .collect(); + indices.par_sort_unstable_by_key(|&x| x.1); + + debug_assert!(indices.len() == self.peaks.len()); + if cfg!(debug_assertions) { + if indices.len() > 1 { + for i in 1..indices.len() { + if indices[i - 1].1 > indices[i].1 { + panic!("Indices are not sorted"); + } + } + } + } + indices + } +} + +impl AsNDPointsAtIndex<3> for QueriableTimeTimsPeaks { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<3> { + NDPoint { + values: [ + self.peaks[index].mz as f32, + self.peaks[index].rt, + self.peaks[index].ims, + ], + } + } + + fn num_ndpoints(&self) -> usize { + self.peaks.len() + } +} + +impl IntenseAtIndex for QueriableTimeTimsPeaks { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self.peaks[index].intensity + } + + fn intensity_index_length(&self) -> usize { + self.peaks.len() + } +} + +impl AsAggregableAtIndex for QueriableTimeTimsPeaks { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> TimeTimsPeak { + self.peaks[index] + } + + fn num_aggregable(&self) -> usize { + self.peaks.len() + } +} + +impl DistantAtIndex for QueriableTimeTimsPeaks { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> f32 { + let a = self.peaks[index]; + let b = self.peaks[other]; + let mz = (a.mz - b.mz) as f32 / self.scalings.mz_scaling; + let rt = (a.rt - b.rt) as f32 / self.scalings.rt_scaling; + let ims = (a.ims - b.ims) as f32 / self.scalings.ims_scaling; + (mz * mz + rt * rt + ims * ims).sqrt() + } +} + +impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { + fn query_ndpoint( + &self, + point: &NDPoint<3>, + ) -> Vec { + let boundary = NDBoundary::new( + [ + (point.values[0] - self.scalings.mz_scaling) - f32::EPSILON, + (point.values[1] - self.scalings.rt_scaling), + (point.values[2] - self.scalings.ims_scaling) - f32::EPSILON, + ], + [ + (point.values[0] + self.scalings.mz_scaling) + f32::EPSILON, + (point.values[1] + self.scalings.rt_scaling), + (point.values[2] + self.scalings.ims_scaling) + f32::EPSILON, + ], + ); + let out = self.query_ndrange(&boundary, None); + out + } + + fn query_ndrange( + &self, + boundary: &NDBoundary<3>, + reference_point: Option<&NDPoint<3>>, + ) -> Vec { + let mut out = Vec::new(); + let mz_range = (boundary.starts[0], boundary.ends[0]); + let mz_range_f64 = (boundary.starts[0] as f64, boundary.ends[0] as f64); + let rt_range = (boundary.starts[1], boundary.ends[1]); + let ims_range = (boundary.starts[2], boundary.ends[2]); + + let (bstart, bend) = binary_search_slice( + &self.min_bucket_mz_vals, + |a, b| a.total_cmp(b), + mz_range.0, + mz_range.1, + ); + + let bstart = bstart.saturating_sub(1); + let bend_new = bend.saturating_add(1).min(self.min_bucket_mz_vals.len()); + + for bnum in bstart..bend_new { + let c_bucket = self.get_bucket_at(bnum); + if c_bucket.is_err() { + continue; + } + let c_bucket = c_bucket.unwrap(); + let page_start = bnum * self.bucket_size; + + let (istart, iend) = + binary_search_slice(c_bucket, |a, b| a.rt.total_cmp(&b), rt_range.0, rt_range.1); + + for (j, peak) in self.peaks[(istart + page_start)..(iend + page_start)] + .iter() + .enumerate() + { + debug_assert!( + peak.rt >= rt_range.0 && peak.rt <= rt_range.1, + "RT out of range -> {} {} {}; istart {}, page_starrt {}, j {}; window rts: {:?}", + peak.rt, + rt_range.0, + rt_range.1, + istart, + page_start, + j, + &self.peaks[(j + istart + page_start).saturating_sub(5) + ..(j + istart + page_start + 5).min(self.peaks.len())] + .iter() + .map(|x| x.rt) + .collect::>() + ); + if peak.ims >= ims_range.0 && peak.ims <= ims_range.1 { + if peak.mz as f32 >= mz_range.0 && peak.mz as f32 <= mz_range.1 { + out.push(j + istart + page_start); + } + } + } + } + + out + } +} diff --git a/src/aggregation/queriable_collections/queriable_traces.rs b/src/aggregation/queriable_collections/queriable_traces.rs new file mode 100644 index 0000000..d1bfdbb --- /dev/null +++ b/src/aggregation/queriable_collections/queriable_traces.rs @@ -0,0 +1,258 @@ +use crate::aggregation::tracing::BaseTrace; +pub use crate::{ + aggregation::tracing::TimeTimsPeak, + space::space_generics::{ + AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDBoundary, + NDPoint, QueriableIndexedPoints, + }, + utils::binary_search_slice, +}; +use log::{debug, info}; +use rayon::prelude::*; + +#[derive(Debug)] +pub struct TraceScalings { + pub rt_scaling: f64, + pub ims_scaling: f64, + pub quad_scaling: f64, +} + +#[derive(Debug)] +pub struct QueriableTraces { + traces: Vec, + min_bucket_rt_vals: Vec, + bucket_size: usize, + scalings: TraceScalings, +} + +impl QueriableTraces { + pub fn new( + mut traces: Vec, + scalings: TraceScalings, + ) -> Self { + const BUCKET_SIZE: usize = 16384 / 2; + // Sort all of our peaks by rt, from low to high + traces.par_sort_unstable_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); + + let mut min_bucket_rt_vals = traces + .par_chunks_mut(BUCKET_SIZE) + .map(|bucket| { + let min = bucket[0].rt; + bucket.par_sort_unstable_by(|a, b| a.mobility.partial_cmp(&b.mobility).unwrap()); + min + }) + .collect::>(); + + // Get the max value of the last bucket + let max_bucket_rt = traces[traces.len().saturating_sub(BUCKET_SIZE)..traces.len()] + .iter() + .max_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()) + .unwrap() + .rt; + min_bucket_rt_vals.push(max_bucket_rt); + + QueriableTraces { + traces, + min_bucket_rt_vals, + bucket_size: BUCKET_SIZE, + scalings, + } + } + + fn get_bucket_at( + &self, + index: usize, + ) -> Result<&[BaseTrace], ()> { + let page_start = index * self.bucket_size; + if page_start >= self.traces.len() { + return Err(()); + } + let page_end = (page_start + self.bucket_size).min(self.traces.len()); + let tmp = &self.traces[page_start..page_end]; + + if cfg!(debug_assertions) { + if rand::random::() % 100 == 0 { + // Make sure all rts are sorted within the bucket + for i in 1..tmp.len() { + if tmp[i - 1].mobility > tmp[i].mobility { + panic!("RTs are not sorted within the bucket"); + } + } + } + } + Ok(tmp) + } + + pub fn get_intensity_sorted_indices(&self) -> Vec<(usize, u64)> { + let mut indices: Vec<(usize, u64)> = (0..self.traces.len()) + .map(|i| (i, self.traces[i].intensity)) + .collect(); + + indices.par_sort_unstable_by_key(|&x| x.1); + indices + } +} + +impl AsNDPointsAtIndex<2> for QueriableTraces { + fn get_ndpoint( + &self, + index: usize, + ) -> NDPoint<2> { + let out = NDPoint { + values: [self.traces[index].rt, self.traces[index].mobility], + }; + out + } + fn num_ndpoints(&self) -> usize { + self.traces.len() + } +} + +impl IntenseAtIndex for QueriableTraces { + fn intensity_at_index( + &self, + index: usize, + ) -> u64 { + self.traces[index].intensity + } + + fn intensity_index_length(&self) -> usize { + self.traces.len() + } +} + +impl AsAggregableAtIndex for QueriableTraces { + fn get_aggregable_at_index( + &self, + index: usize, + ) -> BaseTrace { + self.traces[index] + } + + fn num_aggregable(&self) -> usize { + self.traces.len() + } +} + +pub struct BaseTraceDistance { + pub quad_diff: f32, + pub iou: f32, + pub cosine: f32, +} + +impl DistantAtIndex for QueriableTraces { + fn distance_at_indices( + &self, + index: usize, + other: usize, + ) -> BaseTraceDistance { + let quad_diff = (self.traces[index].quad_center - self.traces[other].quad_center).abs(); + let iou = self.traces[index].rt_iou(&self.traces[other]); + // Q: What can cause an error here?? + let cosine = self.traces[index] + .chromatogram + .cosine_similarity(&self.traces[other].chromatogram) + .unwrap(); + BaseTraceDistance { + quad_diff, + iou, + cosine, + } + } +} + +impl QueriableIndexedPoints<2> for QueriableTraces { + fn query_ndpoint( + &self, + point: &NDPoint<2>, + ) -> Vec { + let (bounds, point) = self.convert_to_bounds_query(point); + self.query_ndrange(&bounds, point) + } + + fn convert_to_bounds_query<'a>( + &'a self, + point: &'a NDPoint<2>, + ) -> (NDBoundary<2>, Option<&NDPoint<2>>) { + let rt = point.values[0]; + let mobility = point.values[1]; + let bounds = NDBoundary::new( + [ + rt - self.scalings.rt_scaling as f32, + mobility - self.scalings.ims_scaling as f32, + ], + [ + rt + self.scalings.rt_scaling as f32, + mobility + self.scalings.ims_scaling as f32, + ], + ); + (bounds, Some(point)) + } + + fn query_ndrange( + &self, + boundary: &NDBoundary<2>, + reference_point: Option<&NDPoint<2>>, + ) -> Vec { + let start_rt = boundary.starts[0]; + let end_rt = boundary.ends[0]; + + let start_ims = boundary.starts[1]; + let end_ims = boundary.ends[1]; + + let mut out = Vec::new(); + let (start_bucket, end_bucket) = binary_search_slice( + &self.min_bucket_rt_vals, + |a, b| a.total_cmp(b), + start_rt, + end_rt, + ); + + let bstart = start_bucket.saturating_sub(1); + let bend_new = end_bucket + .saturating_add(1) + .min(self.min_bucket_rt_vals.len()); + + for bucket_index in bstart..bend_new { + let bucket = match self.get_bucket_at(bucket_index) { + Ok(x) => x, + Err(()) => continue, + }; + + let (ibstart, ibend) = binary_search_slice( + bucket, + |a, b| a.mobility.partial_cmp(&b).unwrap(), + start_ims, + end_ims, + ); + + for (ti, trace) in bucket[ibstart..ibend].iter().enumerate() { + if trace.rt < start_rt || trace.rt > end_rt { + continue; + } + if trace.mobility < start_ims || trace.mobility > end_ims { + continue; + } + if let Some(reference_point) = reference_point { + let dist = (reference_point.values[0] - trace.rt).abs() + + (reference_point.values[1] - trace.mobility).abs(); + if dist > self.scalings.rt_scaling as f32 + self.scalings.ims_scaling as f32 { + continue; + } + } + let pi = ti + ibstart + bucket_index * self.bucket_size; + debug_assert!(pi < self.traces.len()); + out.push(pi); + } + } + + if out.len() == 0 { + info!( + "No traces found for query: \n{:?} -> {:?}\n", + boundary, reference_point + ); + } + + out + } +} diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 1ab9d68..322564b 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -4,6 +4,10 @@ use crate::aggregation::chromatograms::{ }; use crate::aggregation::dbscan::dbscan::dbscan_generic; use crate::aggregation::dbscan::runner::dbscan_label_clusters; +use crate::aggregation::queriable_collections::queriable_indexed_points::{ + QueriableTimeTimsPeaks, TimeTimsPeakScaling, +}; +use crate::aggregation::queriable_collections::queriable_traces::BaseTraceDistance; use crate::ms::frames::DenseFrameWindow; use crate::space::space_generics::{ AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, NDPoint, @@ -214,7 +218,7 @@ pub fn combine_traces( grouped_denseframe_windows: Vec>, config: TracingConfig, rt_binsize: f32, -) -> Vec { +) -> Vec> { // mz_scaling: f64, // rt_scaling: f64, // ims_scaling: f64, @@ -245,18 +249,13 @@ pub fn combine_traces( }; // Combine the traces - let out: Vec = if cfg!(feature = "less_parallel") { + let out: Vec> = if cfg!(feature = "less_parallel") { warn!("Running in single-threaded mode"); - grouped_windows - .into_iter() - .map(combine_lambda) - .flatten() - .collect() + grouped_windows.into_iter().map(combine_lambda).collect() } else { grouped_windows .into_par_iter() .map(combine_lambda) - .flatten() .collect() }; @@ -363,40 +362,6 @@ impl ClusterAggregator for TraceAggregator { } } -#[derive(Debug, Default)] -struct TimeTimsPeakConverter { - // Takes DenseFrameWindow - mz_scaling: f64, - rt_scaling: f64, - ims_scaling: f64, -} - -impl NDPointConverter for TimeTimsPeakConverter { - fn convert( - &self, - elem: &TimeTimsPeak, - ) -> NDPoint<3> { - NDPoint { - values: [ - (elem.mz / self.mz_scaling) as f32, - (elem.rt as f64 / self.rt_scaling) as f32, - (elem.ims as f64 / self.ims_scaling) as f32, - ], - } - } -} - -struct BypassBaseTraceBackConverter {} - -impl NDPointConverter for BypassBaseTraceBackConverter { - fn convert( - &self, - _elem: &BaseTrace, - ) -> NDPoint<3> { - panic!("This should never be called"); - } -} - fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec { denseframe_windows .into_iter() @@ -417,283 +382,6 @@ fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec>() } -impl IntenseAtIndex for Vec { - fn intensity_at_index( - &self, - index: usize, - ) -> u64 { - self[index].intensity - } - - fn intensity_index_length(&self) -> usize { - self.len() - } -} - -impl AsAggregableAtIndex for Vec { - fn get_aggregable_at_index( - &self, - index: usize, - ) -> TimeTimsPeak { - self[index] - } - - fn num_aggregable(&self) -> usize { - self.len() - } -} - -impl DistantAtIndex for Vec { - fn distance_at_indices( - &self, - index: usize, - other: usize, - ) -> f32 { - panic!("I dont think this is called ever ..."); - } -} - -// Needed to specify the generic in dbscan_generic -type FFTimeTimsPeak = fn(&TimeTimsPeak, &TimeTimsPeak) -> bool; - -#[derive(Debug)] -struct TimeTimsPeakScaling { - mz_scaling: f32, - rt_scaling: f32, - ims_scaling: f32, - quad_scaling: f32, -} - -#[derive(Debug)] -struct QueriableTimeTimsPeaks { - peaks: Vec, - min_bucket_mz_vals: Vec, - bucket_size: usize, - scalings: TimeTimsPeakScaling, -} - -impl QueriableTimeTimsPeaks { - fn new( - mut peaks: Vec, - scalings: TimeTimsPeakScaling, - ) -> Self { - const BUCKET_SIZE: usize = 16384; - // // Sort all of our theoretical fragments by m/z, from low to high - peaks.par_sort_unstable_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()); - - let mut min_bucket_mz_vals = peaks - .par_chunks_mut(BUCKET_SIZE) - .map(|bucket| { - let min = bucket[0].mz; - bucket.par_sort_unstable_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); - min as f32 - }) - .collect::>(); - - // Get the max value of the last bucket - let max_bucket_mz = peaks[peaks.len().saturating_sub(BUCKET_SIZE)..peaks.len()] - .iter() - .max_by(|a, b| a.mz.partial_cmp(&b.mz).unwrap()) - .unwrap() - .mz as f32; - min_bucket_mz_vals.push(max_bucket_mz); - - QueriableTimeTimsPeaks { - peaks, - min_bucket_mz_vals, - bucket_size: BUCKET_SIZE, - scalings, - } - } - - fn get_bucket_at( - &self, - index: usize, - ) -> Result<&[TimeTimsPeak], ()> { - let page_start = index * self.bucket_size; - if page_start >= self.peaks.len() { - return Err(()); - } - let page_end = (page_start + self.bucket_size).min(self.peaks.len()); - let tmp = &self.peaks[page_start..page_end]; - - if cfg!(debug_assertions) { - // Make sure all rts are sorted within the bucket - for i in 1..tmp.len() { - if tmp[i - 1].rt > tmp[i].rt { - panic!("RTs are not sorted within the bucket"); - } - } - } - Ok(tmp) - } - - fn get_intensity_sorted_indices(&self) -> Vec<(usize, u64)> { - let mut indices: Vec<(usize, u64)> = (0..self.peaks.len()) - .map(|i| (i, self.peaks[i].intensity)) - .collect(); - indices.par_sort_unstable_by_key(|&x| x.1); - - debug_assert!(indices.len() == self.peaks.len()); - if cfg!(debug_assertions) { - if indices.len() > 1 { - for i in 1..indices.len() { - if indices[i - 1].1 > indices[i].1 { - panic!("Indices are not sorted"); - } - } - } - } - indices - } -} - -impl AsNDPointsAtIndex<3> for QueriableTimeTimsPeaks { - fn get_ndpoint( - &self, - index: usize, - ) -> NDPoint<3> { - NDPoint { - values: [ - self.peaks[index].mz as f32, - self.peaks[index].rt, - self.peaks[index].ims, - ], - } - } - - fn num_ndpoints(&self) -> usize { - self.peaks.len() - } -} - -impl IntenseAtIndex for QueriableTimeTimsPeaks { - fn intensity_at_index( - &self, - index: usize, - ) -> u64 { - self.peaks[index].intensity - } - - fn intensity_index_length(&self) -> usize { - self.peaks.len() - } -} - -impl AsAggregableAtIndex for QueriableTimeTimsPeaks { - fn get_aggregable_at_index( - &self, - index: usize, - ) -> TimeTimsPeak { - self.peaks[index] - } - - fn num_aggregable(&self) -> usize { - self.peaks.len() - } -} - -impl DistantAtIndex for QueriableTimeTimsPeaks { - fn distance_at_indices( - &self, - index: usize, - other: usize, - ) -> f32 { - let a = self.peaks[index]; - let b = self.peaks[other]; - let mz = (a.mz - b.mz) as f32 / self.scalings.mz_scaling; - let rt = (a.rt - b.rt) as f32 / self.scalings.rt_scaling; - let ims = (a.ims - b.ims) as f32 / self.scalings.ims_scaling; - (mz * mz + rt * rt + ims * ims).sqrt() - } -} - -impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { - fn query_ndpoint( - &self, - point: &NDPoint<3>, - ) -> Vec { - let boundary = NDBoundary::new( - [ - (point.values[0] - self.scalings.mz_scaling) - f32::EPSILON, - (point.values[1] - self.scalings.rt_scaling), - (point.values[2] - self.scalings.ims_scaling) - f32::EPSILON, - ], - [ - (point.values[0] + self.scalings.mz_scaling) + f32::EPSILON, - (point.values[1] + self.scalings.rt_scaling), - (point.values[2] + self.scalings.ims_scaling) + f32::EPSILON, - ], - ); - let out = self.query_ndrange(&boundary, None); - out - } - - fn query_ndrange( - &self, - boundary: &NDBoundary<3>, - reference_point: Option<&NDPoint<3>>, - ) -> Vec { - let mut out = Vec::new(); - let mz_range = (boundary.starts[0], boundary.ends[0]); - let mz_range_f64 = (boundary.starts[0] as f64, boundary.ends[0] as f64); - let rt_range = (boundary.starts[1], boundary.ends[1]); - let ims_range = (boundary.starts[2], boundary.ends[2]); - - let (bstart, bend) = binary_search_slice( - &self.min_bucket_mz_vals, - |a, b| a.total_cmp(b), - mz_range.0, - mz_range.1, - ); - - let bstart = bstart.saturating_sub(1); - let bend_new = bend.saturating_add(1).min(self.min_bucket_mz_vals.len()); - - for bnum in bstart..bend_new { - let c_bucket = self.get_bucket_at(bnum); - if c_bucket.is_err() { - continue; - } - let c_bucket = c_bucket.unwrap(); - let page_start = bnum * self.bucket_size; - - let (istart, iend) = - binary_search_slice(c_bucket, |a, b| a.rt.total_cmp(&b), rt_range.0, rt_range.1); - - for (j, peak) in self.peaks[(istart + page_start)..(iend + page_start)] - .iter() - .enumerate() - { - debug_assert!( - peak.rt >= rt_range.0 && peak.rt <= rt_range.1, - "RT out of range -> {} {} {}; istart {}, page_starrt {}, j {}; window rts: {:?}", - peak.rt, - rt_range.0, - rt_range.1, - istart, - page_start, - j, - &self.peaks[(j + istart + page_start).saturating_sub(5) - ..(j + istart + page_start + 5).min(self.peaks.len())] - .iter() - .map(|x| x.rt) - .collect::>() - ); - if peak.ims >= ims_range.0 && peak.ims <= ims_range.1 { - if peak.mz as f32 >= mz_range.0 && peak.mz as f32 <= mz_range.1 { - out.push(j + istart + page_start); - } - } - } - } - - out - } -} - -// QueriableIndexedPoints - fn combine_single_window_traces2( prefiltered_peaks: Vec, mz_scaling: f64, @@ -741,20 +429,22 @@ fn combine_single_window_traces2( i_timer.stop(true); + let def_aggregator = || TraceAggregator { + mz: RollingSDCalculator::default(), + intensity: 0, + rt: RollingSDCalculator::default(), + ims: RollingSDCalculator::default(), + num_peaks: 0, + num_rt_peaks: 0, + quad_low_high: window_quad_low_high, + btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize), + }; + let centroids = aggregate_clusters( cluster_labels.num_clusters, cluster_labels.cluster_labels, &index, - &|| TraceAggregator { - mz: RollingSDCalculator::default(), - intensity: 0, - rt: RollingSDCalculator::default(), - ims: RollingSDCalculator::default(), - num_peaks: 0, - num_rt_peaks: 0, - quad_low_high: window_quad_low_high, - btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize), - }, + &def_aggregator, utils::LogLevel::TRACE, false, ); @@ -762,388 +452,3 @@ fn combine_single_window_traces2( debug!("Combined traces: {}", centroids.len()); centroids } - -// TODO maybe this can be a builder-> executor pattern -fn combine_single_window_traces( - prefiltered_peaks: Vec, - mz_scaling: f64, - max_mz_expansion_ratio: f32, - rt_scaling: f64, - max_rt_expansion_ratio: f32, - ims_scaling: f64, - max_ims_expansion_ratio: f32, - min_n: usize, - min_intensity: u32, - rt_binsize: f32, -) -> Vec { - info!("Peaks in window: {}", prefiltered_peaks.len()); - let converter: TimeTimsPeakConverter = TimeTimsPeakConverter { - mz_scaling, - rt_scaling, - ims_scaling, - }; - let window_quad_low_high = ( - prefiltered_peaks[0].quad_low_high.0, - prefiltered_peaks[0].quad_low_high.1, - ); - let max_extension_distances: [f32; 3] = [ - max_mz_expansion_ratio, - max_rt_expansion_ratio, - max_ims_expansion_ratio, - ]; - warn!("Assuming all quad windows are the same!!! (fine for diaPASEF)"); - - // TODO make dbscan_generic a runner-class - let out_traces: Vec = dbscan_generic( - converter, - &prefiltered_peaks, - min_n, - min_intensity.into(), - || TraceAggregator { - mz: RollingSDCalculator::default(), - intensity: 0, - rt: RollingSDCalculator::default(), - ims: RollingSDCalculator::default(), - num_peaks: 0, - num_rt_peaks: 0, - quad_low_high: window_quad_low_high, - btree_chromatogram: BTreeChromatogram::new_lazy(rt_binsize), - }, - None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, - None, - false, - &max_extension_distances, - None::, - ); - - debug!("Combined traces: {}", out_traces.len()); - out_traces -} - -// NOW ... combine traces into pseudospectra - -/// Peaks are mz-intensity pairs -type Peak = (f64, u64); - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PseudoSpectrum { - pub peaks: Vec, - pub rt: f32, - pub rt_min: f32, - pub rt_max: f32, - pub rt_std: f32, - pub rt_skew: f32, - pub ims: f32, - pub ims_std: f32, - pub ims_skew: f32, - pub quad_low: f32, - pub quad_high: f32, -} - -#[derive(Debug)] -pub struct PseudoSpectrumAggregator { - peaks: Vec, - intensity: u64, - rt: RollingSDCalculator, - ims: RollingSDCalculator, - quad_low: RollingSDCalculator, - quad_high: RollingSDCalculator, -} - -impl Default for PseudoSpectrumAggregator { - fn default() -> Self { - let nv = Vec::new(); - PseudoSpectrumAggregator { - peaks: nv, - intensity: 0, - rt: RollingSDCalculator::default(), - ims: RollingSDCalculator::default(), - // I am adding here because in the future I want to support - // the weird pasef modes. - quad_low: RollingSDCalculator::default(), - quad_high: RollingSDCalculator::default(), - } - } -} - -impl<'a> ClusterAggregator for PseudoSpectrumAggregator { - fn add( - &mut self, - peak: &BaseTrace, - ) { - debug_assert!(peak.intensity < u64::MAX - self.intensity); - - self.rt.add(peak.rt as f64, peak.intensity); - self.ims.add(peak.mobility as f64, peak.intensity); - self.quad_low.add(peak.quad_low, peak.intensity); - self.quad_high.add(peak.quad_high, peak.intensity); - self.peaks.push((peak.mz, peak.intensity)); - } - - fn aggregate(&self) -> PseudoSpectrum { - // TECHNICALLY this can error out if there are no elements... - let rt = self.rt.get_mean() as f32; - let ims = self.ims.get_mean() as f32; - let rt_skew = self.rt.get_skew() as f32; - let ims_skew = self.ims.get_skew() as f32; - let rt_std = self.rt.get_sd() as f32; - let ims_std = self.ims.get_sd() as f32; - let quad_low_high = (self.quad_low.get_mean(), self.quad_high.get_mean()); - - PseudoSpectrum { - peaks: self.peaks.clone(), - rt, - ims, - rt_min: self.rt.get_min().unwrap() as f32, - rt_max: self.rt.get_max().unwrap() as f32, - rt_std, - ims_std, - rt_skew, - ims_skew, - quad_low: quad_low_high.0, - quad_high: quad_low_high.1, - } - } - - fn combine( - self, - other: Self, - ) -> Self { - let mut peaks = self.peaks.clone(); - peaks.extend(other.peaks.clone()); - let mut rt = self.rt; - let mut ims = self.ims; - let mut quad_low = self.quad_low; - let mut quad_high = self.quad_high; - - rt.merge(&other.rt); - ims.merge(&other.ims); - quad_low.merge(&other.quad_low); - quad_high.merge(&other.quad_high); - - PseudoSpectrumAggregator { - peaks, - intensity: self.intensity + other.intensity, - rt, - ims, - quad_low, - quad_high, - } - } -} - -struct BaseTraceConverter { - rt_scaling: f64, - ims_scaling: f64, - quad_scaling: f64, -} - -impl NDPointConverter for BaseTraceConverter { - fn convert( - &self, - elem: &BaseTrace, - ) -> NDPoint<3> { - // let rt_start_use = (elem.rt - elem.rt_std).min(elem.rt - self.peak_width_prior as f32); - // let rt_end_use = (elem.rt + elem.rt_std).max(elem.rt + self.peak_width_prior as f32); - // let rt_start_end_scaling = self.rt_scaling * self.rt_start_end_ratio; - let quad_center = (elem.quad_low + elem.quad_high) / 2.; - NDPoint { - values: [ - (elem.rt as f64 / self.rt_scaling) as f32, - (elem.mobility as f64 / self.ims_scaling) as f32, - (quad_center as f64 / self.quad_scaling) as f32, - ], - } - } -} - -struct PseudoScanBackConverter { - rt_scaling: f64, - ims_scaling: f64, - quad_scaling: f64, -} - -impl NDPointConverter for PseudoScanBackConverter { - fn convert( - &self, - elem: &PseudoSpectrum, - ) -> NDPoint<3> { - let quad_mid = (elem.quad_low + elem.quad_high) / 2.; - NDPoint { - values: [ - (elem.rt as f64 / self.rt_scaling) as f32, - (elem.ims as f64 / self.ims_scaling) as f32, - (quad_mid as f64 / self.quad_scaling) as f32, - ], - } - } -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy)] -pub struct PseudoscanGenerationConfig { - pub rt_scaling: f32, - pub quad_scaling: f32, - pub ims_scaling: f32, - pub max_rt_expansion_ratio: f32, - pub max_quad_expansion_ratio: f32, - pub max_ims_expansion_ratio: f32, - pub min_n: u8, - pub min_neighbor_intensity: u32, -} - -impl Default for PseudoscanGenerationConfig { - fn default() -> Self { - PseudoscanGenerationConfig { - rt_scaling: 2.4, - quad_scaling: 5., - ims_scaling: 0.015, - max_rt_expansion_ratio: 5., - max_quad_expansion_ratio: 1., - max_ims_expansion_ratio: 2., - min_n: 6, - min_neighbor_intensity: 6000, - } - } -} - -impl IntenseAtIndex for Vec { - fn intensity_at_index( - &self, - index: usize, - ) -> u64 { - self[index].intensity - } - - fn intensity_index_length(&self) -> usize { - self.len() - } -} - -impl AsAggregableAtIndex for Vec { - fn get_aggregable_at_index( - &self, - index: usize, - ) -> BaseTrace { - self[index] - } - - fn num_aggregable(&self) -> usize { - self.len() - } -} - -struct BaseTraceDistance { - quad_diff: f32, - iou: f32, - cosine: f32, -} - -impl DistantAtIndex for Vec { - fn distance_at_indices( - &self, - index: usize, - other: usize, - ) -> BaseTraceDistance { - let quad_diff = (self[index].quad_center - self[other].quad_center).abs(); - let iou = self[index].rt_iou(&self[other]); - // Q: What can cause an error here?? - let cosine = self[index] - .chromatogram - .cosine_similarity(&self[other].chromatogram) - .unwrap(); - BaseTraceDistance { - quad_diff, - iou, - cosine, - } - } -} - -pub fn combine_pseudospectra( - traces: Vec, - config: PseudoscanGenerationConfig, -) -> Vec { - let mut timer = - utils::ContextTimer::new("Combining pseudospectra", true, utils::LogLevel::INFO); - - let converter = BaseTraceConverter { - rt_scaling: config.rt_scaling.into(), - ims_scaling: config.ims_scaling.into(), - quad_scaling: config.quad_scaling.into(), - // rt_start_end_ratio: 2., - // peak_width_prior: 0.75, - }; - - const IOU_THRESH: f32 = 0.1; - const COSINE_THRESH: f32 = 0.8; - let extra_filter_fun = |x: &BaseTraceDistance| { - let close_in_quad = (x.quad_diff).abs() < 5.0; - let within_iou_tolerance = x.iou > IOU_THRESH; - let within_cosine_tolerance = x.cosine > COSINE_THRESH; - - return close_in_quad && within_iou_tolerance && within_cosine_tolerance; - }; - - let back_converter = PseudoScanBackConverter { - rt_scaling: config.rt_scaling.into(), - ims_scaling: config.ims_scaling.into(), - quad_scaling: config.quad_scaling.into(), - }; - let max_extension_distances: [f32; 3] = [ - config.max_rt_expansion_ratio, - config.max_ims_expansion_ratio, - config.max_quad_expansion_ratio, - ]; - - let foo: Vec = dbscan_generic( - converter, - &traces, - config.min_n.into(), - config.min_neighbor_intensity.into(), - PseudoSpectrumAggregator::default, - Some(&extra_filter_fun), - Some(utils::LogLevel::INFO), - false, - &max_extension_distances, - Some(back_converter), - ); - - info!("Combined pseudospectra: {}", foo.len()); - timer.stop(true); - foo -} - -pub fn write_pseudoscans_json( - pseudocscans: &[PseudoSpectrum], - out_path: impl AsRef, -) -> Result<(), Box> { - info!( - "Writting pseudoscans to json: {}", - out_path.as_ref().display() - ); - let mut file = std::fs::File::create(out_path)?; - file.write("[".as_bytes())?; - let mut is_first = true; - for x in pseudocscans { - let json = serde_json::to_string(&x)?; - if is_first { - is_first = false; - } else { - file.write(",\n".as_bytes())?; - } - file.write(json.as_bytes())?; - } - file.write("]".as_bytes())?; - - Ok(()) -} - -// pub fn read_pseudoscans_json( -// in_path: impl AsRef, -// ) -> Result, Box> { -// info!("Reading pseudoscans from json {}", in_path.as_ref().display()); -// let file = std::fs::File::open(in_path)?; -// let reader = std::io::BufReader::new(file); -// let out: Vec = serde_json::from_reader(reader)?; -// Ok(out) -// } diff --git a/src/main.rs b/src/main.rs index 105cb54..463ac94 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ extern crate log; extern crate pretty_env_logger; use clap::Parser; +use log::debug; use crate::scoring::SageSearchConfig; use serde::{Deserialize, Serialize}; @@ -59,7 +60,7 @@ impl Default for OutputConfig { struct Config { denoise_config: aggregation::ms_denoise::DenoiseConfig, tracing_config: aggregation::tracing::TracingConfig, - pseudoscan_generation_config: aggregation::tracing::PseudoscanGenerationConfig, + pseudoscan_generation_config: aggregation::pseudospectra::PseudoscanGenerationConfig, sage_search_config: SageSearchConfig, output_config: OutputConfig, } @@ -134,28 +135,38 @@ fn main() { let mut traces = aggregation::tracing::combine_traces(dia_frames, config.tracing_config, cycle_time); - let out = match out_traces_path { - Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), - None => Ok(()), - }; - match out { - Ok(_) => {}, - Err(e) => { - log::warn!("Error writing traces: {:?}", e); - }, - } - - println!("traces: {:?}", traces.len()); - traces.retain(|x| x.num_agg > 3); - println!("traces: {:?}", traces.len()); - if traces.len() > 5 { - println!("sample_trace: {:?}", traces[traces.len() - 4]) + // let out = match out_traces_path { + // Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), + // None => Ok(()), + // }; + // match out { + // Ok(_) => {}, + // Err(e) => { + // log::warn!("Error writing traces: {:?}", e); + // }, + // } + + let num_traces = traces.len(); + for (i, trace) in traces.iter_mut().enumerate() { + debug!("trace {}/{}: {}", i, num_traces, trace.len()); + trace.retain(|x| x.num_agg > 3); + debug!( + "trace {}/{}: {} (after dopping too short)", + i, + num_traces, + trace.len() + ); + if trace.len() > 5 { + debug!("sample_trace: {:?}", trace[trace.len() - 4]) + } } // Maybe reparametrize as 1.1 cycle time // TODO add here expansion limits - let mut pseudoscans = - aggregation::tracing::combine_pseudospectra(traces, config.pseudoscan_generation_config); + let mut pseudoscans = aggregation::pseudospectra::combine_pseudospectra( + traces, + config.pseudoscan_generation_config, + ); // Report min/max/average/std and skew for ims and rt // This can probably be a macro ... @@ -189,7 +200,9 @@ fn main() { println!("npeaks: {:?}", npeaks); let out = match out_path_scans { - Some(out_path) => aggregation::tracing::write_pseudoscans_json(&pseudoscans, out_path), + Some(out_path) => { + aggregation::pseudospectra::write_pseudoscans_json(&pseudoscans, out_path) + }, None => Ok(()), }; diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index d9e1d89..f0e4334 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -5,8 +5,7 @@ use timsrust::{Frame, FrameType}; use crate::{ space::space_generics::{ - convert_to_bounds_query, AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, - QueriableIndexedPoints, + AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, }, utils::binary_search_slice, }; @@ -724,7 +723,7 @@ impl QueriableIndexedPoints<2> for ExpandedFrameSlice { &self, point: &NDPoint<2>, ) -> Vec { - let query = convert_to_bounds_query(point); + let query = self.convert_to_bounds_query(point); self.query_ndrange(&query.0, query.1) } diff --git a/src/scoring.rs b/src/scoring.rs index b4e9bea..edb8498 100644 --- a/src/scoring.rs +++ b/src/scoring.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use crate::aggregation::tracing::PseudoSpectrum; +use crate::aggregation::pseudospectra::PseudoSpectrum; use indicatif::ParallelProgressIterator; use log::warn; @@ -279,7 +279,7 @@ pub fn score_pseudospectra( let progbar = indicatif::ProgressBar::new(spectra.len() as u64); - log::info!("Scoring pseudospectra ..."); + log::info!("Scoring {} pseudospectra ...", spectra.len()); let mut features = spectra .par_iter() .progress_with(progbar) diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index cd806c0..52aa23d 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -79,7 +79,7 @@ impl NDBoundary { NDBoundary::new(starts, ends) } - pub fn expand( + pub fn expand_relative( &mut self, factors: &[f32; D], ) { @@ -95,6 +95,22 @@ impl NDBoundary { self.centers[i] = (self.ends[i] + self.starts[i]) / 2.0; } } + pub fn expand_absolute( + &mut self, + factors: &[f32; D], + ) { + for (i, ef) in factors.iter().enumerate() { + let new_start = self.starts[i] - ef; + let new_end = self.ends[i] + ef; + let new_center = (new_start + new_end) / 2.0; + let new_width = new_end - new_start; + + self.starts[i] = new_start; + self.ends[i] = new_end; + self.widths[i] = new_width; + self.centers[i] = new_center; + } + } } // #[derive(Debug, Clone, Copy)] @@ -108,12 +124,38 @@ pub trait QueriableIndexedPoints { fn query_ndpoint( &self, point: &NDPoint, - ) -> Vec; + ) -> Vec { + let (bounds, reference_point) = self.convert_to_bounds_query(point); + self.query_ndrange(&bounds, reference_point) + } fn query_ndrange( &self, boundary: &NDBoundary, reference_point: Option<&NDPoint>, ) -> Vec; + fn convert_to_bounds_query<'a>( + &'a self, + point: &'a NDPoint, + ) -> (NDBoundary, Option<&NDPoint>) { + let bounds = NDBoundary::new( + point + .values + .iter() + .map(|x| *x - 1.) + .collect::>() + .try_into() + .unwrap(), + point + .values + .iter() + .map(|x| *x + 1.) + .collect::>() + .try_into() + .unwrap(), + ); + + (bounds, Some(point)) + } } pub trait AsNDPointsAtIndex { @@ -256,26 +298,3 @@ pub trait NDPointConverter { (points, boundary) } } - -pub fn convert_to_bounds_query<'a, const D: usize>( - point: &'a NDPoint -) -> (NDBoundary, Option<&'a NDPoint>) { - let bounds = NDBoundary::new( - point - .values - .iter() - .map(|x| *x - 1.) - .collect::>() - .try_into() - .unwrap(), - point - .values - .iter() - .map(|x| *x + 1.) - .collect::>() - .try_into() - .unwrap(), - ); - - (bounds, Some(point)) -} From 67db55ca91cd3a840a830ea32ad0ae7ebfdbaf7a Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 25 Jul 2024 00:42:54 -0700 Subject: [PATCH 22/26] (wip,chore) clippy and deny update --- .pre-commit-config.yaml | 4 ++ Cargo.lock | 12 ++-- deny.toml | 71 ++++--------------- src/aggregation/aggregators.rs | 3 +- src/aggregation/dbscan/dbscan.rs | 29 ++++---- src/aggregation/dbscan/denseframe_dbscan.rs | 16 ++--- src/aggregation/dbscan/runner.rs | 29 +++----- src/aggregation/ms_denoise.rs | 22 +++--- src/aggregation/pseudospectra.rs | 54 ++++++-------- .../queriable_indexed_points.rs | 40 +++++------ .../queriable_collections/queriable_traces.rs | 23 +++--- src/aggregation/tracing.rs | 16 ++--- src/main.rs | 2 +- src/ms/frames/frame_slice.rs | 9 ++- src/ms/frames/frame_slice_rt_window.rs | 26 +++---- src/space/kdtree.rs | 4 +- src/space/space_generics.rs | 10 ++- src/utils.rs | 34 +++++---- 18 files changed, 163 insertions(+), 241 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 614e968..67bb92a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,10 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: +- repo: https://github.com/EmbarkStudios/cargo-deny + rev: 0.14.16 # choose your preferred tag + hooks: + - id: cargo-deny - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.2.0 hooks: diff --git a/Cargo.lock b/Cargo.lock index 2defd14..10ad791 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -331,9 +331,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" [[package]] name = "cc" @@ -1439,9 +1439,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" dependencies = [ "bitflags 2.5.0", "cfg-if", @@ -1471,9 +1471,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" dependencies = [ "cc", "libc", diff --git a/deny.toml b/deny.toml index 854ddb9..51e81af 100644 --- a/deny.toml +++ b/deny.toml @@ -9,7 +9,7 @@ # The values provided in this template are the default values that will be used # when any section or field is not specified in your own configuration -# Root options +[graph] # If 1 or more target triples (and optionally, target_features) are specified, # only the specified targets will be checked when running `cargo deny check`. @@ -50,6 +50,8 @@ no-default-features = false # If set, these feature will be enabled when collecting metadata. If `--features` # is specified on the cmd line they will take precedence over this option. #features = [] + +[output] # When outputting inclusion graphs in diagnostics that include features, this # option can be used to specify the depth at which feature edges will be added. # This option is included since the graphs can be quite large and the addition @@ -61,24 +63,18 @@ feature-depth = 1 # More documentation for the advisories section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html [advisories] +version = 2 # The path where the advisory database is cloned/fetched into db-path = "~/.cargo/advisory-db" # The url(s) of the advisory databases to use db-urls = ["https://github.com/rustsec/advisory-db"] # The lint level for security vulnerabilities -vulnerability = "deny" -# The lint level for unmaintained crates -unmaintained = "warn" -# The lint level for crates that have been yanked from their source registry yanked = "warn" -# The lint level for crates with security notices. Note that as of -# 2019-12-17 there are no security notice advisories in -# https://github.com/rustsec/advisory-db -notice = "warn" # A list of advisory IDs to ignore. Note that ignored advisories will still # output a note when they are encountered. ignore = [ #"RUSTSEC-0000-0000", + "RUSTSEC-2021-0145", # atty... potential unaligner when using custom allocators. ] # Threshold for security vulnerabilities, any vulnerability with a CVSS score # lower than the range specified will be ignored. Note that ignored advisories @@ -100,8 +96,7 @@ ignore = [ # More documentation for the licenses section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html [licenses] -# The lint level for crates which do not have a detectable license -unlicensed = "deny" +version = 2 # List of explicitly allowed licenses # See https://spdx.org/licenses/ for list of possible licenses # [possible values: any SPDX 3.11 short identifier (+ optional exception)]. @@ -109,39 +104,12 @@ allow = [ "MIT", "Apache-2.0", "Apache-2.0 WITH LLVM-exception", - "CC0-1.0", "BSD-3-Clause", - "ISC", "Unicode-DFS-2016", "Zlib", # Thank Jesus for rerun that compiled the following for me... - "OFL-1.1", # https://spdx.org/licenses/OFL-1.1.html - "MPL-2.0", # https://www.mozilla.org/en-US/MPL/2.0/FAQ/ - see Q11. Used by webpki-roots on Linux. - "OpenSSL", # https://www.openssl.org/source/license.html - used on Linux - "LicenseRef-UFL-1.0", # See https://github.com/emilk/egui/issues/2321 -] -# List of explicitly disallowed licenses -# See https://spdx.org/licenses/ for list of possible licenses -# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -deny = [ - #"Nokia", ] -# Lint level for licenses considered copyleft -copyleft = "deny" -# Blanket approval or denial for OSI-approved or FSF Free/Libre licenses -# * both - The license will be approved if it is both OSI-approved *AND* FSF -# * either - The license will be approved if it is either OSI-approved *OR* FSF -# * osi - The license will be approved if it is OSI approved -# * fsf - The license will be approved if it is FSF Free -# * osi-only - The license will be approved if it is OSI-approved *AND NOT* FSF -# * fsf-only - The license will be approved if it is FSF *AND NOT* OSI-approved -# * neither - This predicate is ignored and the default lint level is used -allow-osi-fsf-free = "neither" -# Lint level used when no other predicates are matched -# 1. License isn't in the allow or deny lists -# 2. License isn't copyleft -# 3. License isn't OSI/FSF, or allow-osi-fsf-free = "neither" -default = "deny" + # The confidence threshold for detecting a license from license text. # The higher the value, the more closely the license text must be to the # canonical license text of a valid SPDX license file. @@ -149,11 +117,7 @@ default = "deny" confidence-threshold = 0.95 # Allow 1 or more licenses on a per-crate basis, so that particular licenses # aren't accepted for every possible crate as with the normal allow list -exceptions = [ - # Each entry is the crate and version constraint, and its specific allow - # list - { allow = ["Zlib"], name = "adler32", version = "*" }, -] +exceptions = [] # Some crates don't have (easily) machine readable licensing information, # adding a clarification entry for it allows you to manually specify the @@ -260,19 +224,12 @@ deny = [ #exact = true # Certain crates/versions that will be skipped when doing duplicate detection. -skip = [ - # Following line comes from the rerun repo ...https://github.com/rerun-io/rerun/blob/main/deny.toml - { name = "webpki-roots" }, # ureq and tungstenite are on different version 😭 - - #{ name = "ansi_term", version = "=0.11.0" }, -] +skip = [] # Similarly to `skip` allows you to skip certain crates during duplicate # detection. Unlike skip, it also includes the entire tree of transitive # dependencies starting at the specified crate, up to a certain depth, which is # by default infinite. -skip-tree = [ - #{ name = "ansi_term", version = "=0.11.0", depth = 20 }, -] +skip-tree = [] # This section is considered when running `cargo deny check sources`. # More documentation about the 'sources' section can be found here: @@ -288,12 +245,8 @@ unknown-git = "warn" # if not specified. If it is specified but empty, no registries are allowed. allow-registry = ["https://github.com/rust-lang/crates.io-index"] # List of URLs for allowed Git repositories -allow-git = [] +allow-git = ["https://github.com/lazear/sage"] [sources.allow-org] # 1 or more github.com organizations to allow git sources for -github = [""] -# 1 or more gitlab.com organizations to allow git sources for -gitlab = [""] -# 1 or more bitbucket.org organizations to allow git sources for -bitbucket = [""] +# github = [""] diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index 6d4bdec..8a58358 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,7 +1,6 @@ use crate::ms::frames::TimsPeak; -use crate::space::space_generics::{AsAggregableAtIndex, HasIntensity, IntenseAtIndex}; +use crate::space::space_generics::{AsAggregableAtIndex, HasIntensity}; use crate::utils; -use std::ops::Index; use rayon::prelude::*; diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index 954a5bf..e0190bc 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,14 +1,14 @@ -use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator, ClusterLabel}; +use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; use crate::space::kdtree::RadiusKDTree; use crate::space::space_generics::{ AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPoint, NDPointConverter, QueriableIndexedPoints, }; use crate::utils::{self, ContextTimer}; -use log::{debug, info, trace}; +use log::info; use rayon::prelude::*; use std::fmt::Debug; -use std::ops::{Add, Index}; +use std::ops::Add; use crate::aggregation::dbscan::runner::dbscan_label_clusters; @@ -64,15 +64,13 @@ pub fn reassign_centroid< } // 1/1000 show the first and last neighbor, as well as the centroid - if neighbors.len() > 0 { - if rand::random::() < 0.001 { - println!( - "Centroid: {:?}, First: {:?}, Last: {:?}", - centroid, - neighbors[0], - neighbors[neighbors.len() - 1] - ); - } + if !neighbors.is_empty() && rand::random::() < 0.001 { + println!( + "Centroid: {:?}, First: {:?}, Last: {:?}", + centroid, + neighbors[0], + neighbors[neighbors.len() - 1] + ); } let mut aggregator = def_aggregator(); @@ -149,7 +147,7 @@ where let quad_indices = (0..ndpoints.len()).collect::>(); for (quad_point, i) in ndpoints.iter().zip(quad_indices.iter()) { - tree.insert_ndpoint(quad_point.clone(), i); + tree.insert_ndpoint(*quad_point, i); } i_timer.stop(true); @@ -234,13 +232,12 @@ pub fn dbscan_aggregate< ); i_timer.stop(true); - let centroids = aggregate_clusters( + aggregate_clusters( cluster_labels.num_clusters, cluster_labels.cluster_labels, prefiltered_peaks, &def_aggregator, log_level, keep_unclustered, - ); - centroids + ) } diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 39ecfc6..92cb3fa 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -95,15 +95,15 @@ impl AsAggregableAtIndex for Vec { impl DistantAtIndex for Vec { fn distance_at_indices( &self, - index: usize, - other: usize, + _index: usize, + _other: usize, ) -> f32 { panic!("I dont think this is called ever ..."); - let mut sum = 0.0; - let diff_mz = (self[index].mz - self[other].mz) as f32; - sum += diff_mz * diff_mz; - let diff_ims = self[index].mobility - self[other].mobility; - sum += diff_ims * diff_ims; - sum.sqrt() + // let mut sum = 0.0; + // let diff_mz = (self[index].mz - self[other].mz) as f32; + // sum += diff_mz * diff_mz; + // let diff_ims = self[index].mobility - self[other].mobility; + // sum += diff_ims * diff_ims; + // sum.sqrt() } } diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 74245fb..3bb8c58 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -5,7 +5,7 @@ use crate::space::space_generics::{ use crate::utils; use core::fmt::Debug; use indicatif::ProgressIterator; -use log::{debug, trace}; +use log::debug; use std::marker::PhantomData; use std::sync::Arc; @@ -181,10 +181,7 @@ impl DBSCANRunnerState { { let cluster_labels = ClusterLabels::new(nlabels); - let filter_fun_cache = match usize_filterfun { - Some(_) => Some(FilterFunCache::new(nlabels)), - None => None, - }; + let filter_fun_cache = usize_filterfun.map(|_| FilterFunCache::new(nlabels)); //FilterFunCache::new(Box::new(&usize_filterfun), nlabels); let timers = DBScanTimers::new(); let candidate_metrics = CandidateCountMetrics::new(); @@ -374,8 +371,8 @@ where let cl = |a: &usize, b: &usize| { filterfun(&raw_distance_calculator.distance_at_indices(*a, *b)) }; - let bind = Some(cl); - bind + + Some(cl) }, None => None, }; @@ -388,7 +385,7 @@ where let points: DBSCANPoints = DBSCANPoints { raw_elements, - intensity_sorted_indices: intensity_sorted_indices, + intensity_sorted_indices, indexed_points, projected_elements, raw_dist: raw_distance_calculator, @@ -591,7 +588,7 @@ where .map(|i| points.intensity_at_index(*i)) .sum::(); timers.outer_intensity_calculation.stop(false); - return neighbor_intensity_total >= self.min_intensity; + neighbor_intensity_total >= self.min_intensity } fn main_loop_expand_cluster( @@ -661,11 +658,7 @@ where { timers.inner_loop_nn_timer.reset_start(); let binding = Arc::clone(&points).get_ndpoint(neighbor_index); - let local_neighbors: Vec = points - .query_ndpoint(&binding) - .iter() - .map(|x| *x) - .collect::>(); + let local_neighbors: Vec = points.query_ndpoint(&binding).to_vec(); // Should I warn if nothing is gotten here? // every point should have at least itself as a neighbor ... debug_assert!(!local_neighbors.is_empty()); @@ -862,18 +855,16 @@ pub fn dbscan_label_clusters< min_n, min_intensity, progress, - filter_fun: filter_fun, + filter_fun, max_extension_distances, _phantom: PhantomData::, }; - let cluster_labels = runner.run( + runner.run( raw_elements, intensity_sorted_indices, indexed_points, projected_elements, raw_elements, - ); - - cluster_labels + ) } diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 763d67e..58e2986 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -151,19 +151,19 @@ fn denoise_frame_slice_window( frameslice_window: &[ExpandedFrameSlice], ims_converter: &timsrust::Scan2ImConverter, mz_converter: &timsrust::Tof2MzConverter, - dia_frame_info: &DIAFrameInfo, + _dia_frame_info: &DIAFrameInfo, min_n: usize, min_intensity: u64, - mz_scaling: f64, - max_mz_extension: f64, - ims_scaling: f32, - max_ims_extension: f32, + _mz_scaling: f64, + _max_mz_extension: f64, + _ims_scaling: f32, + _max_ims_extension: f32, ) -> DenseFrameWindow { let timer = utils::ContextTimer::new("dbscan_dfs", true, utils::LogLevel::TRACE); let fsw = FrameSliceWindow::new(frameslice_window); let ref_frame_parent_index = fsw.window[fsw.reference_index].parent_frame_index; let saved_first = - maybe_save_json_if_debugging(&fsw, &*format!("fsw_{}", ref_frame_parent_index), false); + maybe_save_json_if_debugging(&fsw, &format!("fsw_{}", ref_frame_parent_index), false); let mut intensity_sorted_indices = Vec::with_capacity(fsw.num_ndpoints()); for i in 0..fsw.num_ndpoints() { @@ -216,11 +216,11 @@ fn denoise_frame_slice_window( MsMsFrameSliceWindowInfo::SingleWindow(x) => x.global_quad_row_id, }; let min_mz = match slice_info { - MsMsFrameSliceWindowInfo::WindowGroup(x) => 0.0, + MsMsFrameSliceWindowInfo::WindowGroup(_x) => 0.0, MsMsFrameSliceWindowInfo::SingleWindow(x) => x.mz_start, }; let max_mz = match slice_info { - MsMsFrameSliceWindowInfo::WindowGroup(x) => 0.0, + MsMsFrameSliceWindowInfo::WindowGroup(_x) => 0.0, MsMsFrameSliceWindowInfo::SingleWindow(x) => x.mz_end, }; @@ -256,11 +256,11 @@ fn denoise_frame_slice_window( mz_start: min_mz as f64, mz_end: max_mz as f64, group_id: quad_group_id, - quad_group_id: quad_group_id, + quad_group_id, }; maybe_save_json_if_debugging( &out, - &*format!("dfw_out_{}", ref_frame_parent_index), + &format!("dfw_out_{}", ref_frame_parent_index), saved_first, ); @@ -456,7 +456,7 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> let mut denoised_elements: Vec = if cfg!(feature = "less_parallel") { warn!("Running in less parallel mode"); - sv.into_iter() + sv.iter() .map(|x| ExpandedFrameSlice::from_frame_slice(x)) .collect::>() .windows(3) diff --git a/src/aggregation/pseudospectra.rs b/src/aggregation/pseudospectra.rs index 838a0ab..849d354 100644 --- a/src/aggregation/pseudospectra.rs +++ b/src/aggregation/pseudospectra.rs @@ -1,30 +1,21 @@ -use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; -use crate::aggregation::chromatograms::{ - BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, -}; -use crate::aggregation::dbscan::dbscan::{dbscan_aggregate, dbscan_generic, reassign_centroid}; -use crate::aggregation::dbscan::runner::dbscan_label_clusters; -use crate::aggregation::queriable_collections::queriable_indexed_points::{ - QueriableTimeTimsPeaks, TimeTimsPeakScaling, -}; +use crate::aggregation::aggregators::ClusterAggregator; + +use crate::aggregation::dbscan::dbscan::{dbscan_aggregate, reassign_centroid}; + use crate::aggregation::queriable_collections::queriable_traces::{ BaseTraceDistance, TraceScalings, }; use crate::aggregation::queriable_collections::QueriableTraces; -use crate::ms::frames::DenseFrameWindow; -use crate::space::space_generics::{ - AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, NDPoint, - NDPointConverter, QueriableIndexedPoints, TraceLike, -}; -use crate::space::space_generics::{IntenseAtIndex, NDBoundary}; + +use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter}; + use crate::utils; -use crate::utils::{binary_search_slice, RollingSDCalculator}; +use crate::utils::RollingSDCalculator; + +use log::info; -use core::panic; -use log::{debug, error, info, warn}; -use rayon::iter::IntoParallelIterator; use rayon::prelude::*; -use serde::ser::{SerializeStruct, Serializer}; + use serde::{Deserialize, Serialize}; use std::error::Error; use std::io::Write; @@ -66,7 +57,7 @@ impl NDPointConverter for PseudoscanGenerationConfig { ) -> NDPoint<2> { // let quad_mid = (elem.quad_low + elem.quad_high) / 2.; NDPoint { - values: [elem.rt as f32, elem.ims as f32], + values: [elem.rt, elem.ims], } } } @@ -77,8 +68,7 @@ pub fn combine_pseudospectra( ) -> Vec { traces .into_iter() - .map(|x| combine_single_pseudospectra_window(x, config.clone())) - .flatten() + .flat_map(|x| combine_single_pseudospectra_window(x, config)) .collect() } @@ -104,7 +94,7 @@ pub fn combine_single_pseudospectra_window( let within_iou_tolerance = x.iou > IOU_THRESH; let within_cosine_tolerance = x.cosine > COSINE_THRESH; - return close_in_quad && within_iou_tolerance && within_cosine_tolerance; + close_in_quad && within_iou_tolerance && within_cosine_tolerance }; let max_extension_distances: [f32; 2] = [ @@ -148,15 +138,15 @@ pub fn combine_single_pseudospectra_window( let quad_diff_b = (p.quad_low - b.quad_low).abs(); let diff = rt_diff + ims_diff + quad_diff; let diff_b = rt_diff_b + ims_diff_b + quad_diff_b; - let out = diff.total_cmp(&diff_b); - out + + diff.total_cmp(&diff_b) }; let agg2 = reassign_centroid( agg1, &qtt, - config.clone(), + config, &qtt, - &PseudoSpectrumAggregator::default, + PseudoSpectrumAggregator::default, utils::LogLevel::INFO, &reassign_max_distances, Some(300), @@ -177,18 +167,18 @@ pub fn write_pseudoscans_json( out_path.as_ref().display() ); let mut file = std::fs::File::create(out_path)?; - file.write("[".as_bytes())?; + file.write_all("[".as_bytes())?; let mut is_first = true; for x in pseudocscans { let json = serde_json::to_string(&x)?; if is_first { is_first = false; } else { - file.write(",\n".as_bytes())?; + file.write_all(",\n".as_bytes())?; } - file.write(json.as_bytes())?; + file.write_all(json.as_bytes())?; } - file.write("]".as_bytes())?; + file.write_all("]".as_bytes())?; Ok(()) } diff --git a/src/aggregation/queriable_collections/queriable_indexed_points.rs b/src/aggregation/queriable_collections/queriable_indexed_points.rs index 0adb510..710082d 100644 --- a/src/aggregation/queriable_collections/queriable_indexed_points.rs +++ b/src/aggregation/queriable_collections/queriable_indexed_points.rs @@ -73,11 +73,11 @@ impl QueriableTimeTimsPeaks { // Check every 100 random queries ... if rand::random::() % 100 == 0 { let mut last_rt = 0.; - for i in 0..tmp.len() { - if tmp[i].rt < last_rt { + for item in tmp { + if item.rt < last_rt { panic!("RTs are not sorted within the bucket"); } - last_rt = tmp[i].rt; + last_rt = item.rt; } } } @@ -91,12 +91,10 @@ impl QueriableTimeTimsPeaks { indices.par_sort_unstable_by_key(|&x| x.1); debug_assert!(indices.len() == self.peaks.len()); - if cfg!(debug_assertions) { - if indices.len() > 1 { - for i in 1..indices.len() { - if indices[i - 1].1 > indices[i].1 { - panic!("Indices are not sorted"); - } + if cfg!(debug_assertions) && indices.len() > 1 { + for i in 1..indices.len() { + if indices[i - 1].1 > indices[i].1 { + panic!("Indices are not sorted"); } } } @@ -158,8 +156,8 @@ impl DistantAtIndex for QueriableTimeTimsPeaks { let a = self.peaks[index]; let b = self.peaks[other]; let mz = (a.mz - b.mz) as f32 / self.scalings.mz_scaling; - let rt = (a.rt - b.rt) as f32 / self.scalings.rt_scaling; - let ims = (a.ims - b.ims) as f32 / self.scalings.ims_scaling; + let rt = (a.rt - b.rt) / self.scalings.rt_scaling; + let ims = (a.ims - b.ims) / self.scalings.ims_scaling; (mz * mz + rt * rt + ims * ims).sqrt() } } @@ -181,18 +179,18 @@ impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { (point.values[2] + self.scalings.ims_scaling) + f32::EPSILON, ], ); - let out = self.query_ndrange(&boundary, None); - out + + self.query_ndrange(&boundary, None) } fn query_ndrange( &self, boundary: &NDBoundary<3>, - reference_point: Option<&NDPoint<3>>, + _reference_point: Option<&NDPoint<3>>, ) -> Vec { let mut out = Vec::new(); let mz_range = (boundary.starts[0], boundary.ends[0]); - let mz_range_f64 = (boundary.starts[0] as f64, boundary.ends[0] as f64); + let _mz_range_f64 = (boundary.starts[0] as f64, boundary.ends[0] as f64); let rt_range = (boundary.starts[1], boundary.ends[1]); let ims_range = (boundary.starts[2], boundary.ends[2]); @@ -215,7 +213,7 @@ impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { let page_start = bnum * self.bucket_size; let (istart, iend) = - binary_search_slice(c_bucket, |a, b| a.rt.total_cmp(&b), rt_range.0, rt_range.1); + binary_search_slice(c_bucket, |a, b| a.rt.total_cmp(b), rt_range.0, rt_range.1); for (j, peak) in self.peaks[(istart + page_start)..(iend + page_start)] .iter() @@ -236,10 +234,12 @@ impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { .map(|x| x.rt) .collect::>() ); - if peak.ims >= ims_range.0 && peak.ims <= ims_range.1 { - if peak.mz as f32 >= mz_range.0 && peak.mz as f32 <= mz_range.1 { - out.push(j + istart + page_start); - } + if peak.ims >= ims_range.0 + && peak.ims <= ims_range.1 + && peak.mz as f32 >= mz_range.0 + && peak.mz as f32 <= mz_range.1 + { + out.push(j + istart + page_start); } } } diff --git a/src/aggregation/queriable_collections/queriable_traces.rs b/src/aggregation/queriable_collections/queriable_traces.rs index d1bfdbb..fcddbde 100644 --- a/src/aggregation/queriable_collections/queriable_traces.rs +++ b/src/aggregation/queriable_collections/queriable_traces.rs @@ -7,7 +7,7 @@ pub use crate::{ }, utils::binary_search_slice, }; -use log::{debug, info}; +use log::info; use rayon::prelude::*; #[derive(Debug)] @@ -70,13 +70,11 @@ impl QueriableTraces { let page_end = (page_start + self.bucket_size).min(self.traces.len()); let tmp = &self.traces[page_start..page_end]; - if cfg!(debug_assertions) { - if rand::random::() % 100 == 0 { - // Make sure all rts are sorted within the bucket - for i in 1..tmp.len() { - if tmp[i - 1].mobility > tmp[i].mobility { - panic!("RTs are not sorted within the bucket"); - } + if cfg!(debug_assertions) && rand::random::() % 100 == 0 { + // Make sure all rts are sorted within the bucket + for i in 1..tmp.len() { + if tmp[i - 1].mobility > tmp[i].mobility { + panic!("RTs are not sorted within the bucket"); } } } @@ -98,10 +96,9 @@ impl AsNDPointsAtIndex<2> for QueriableTraces { &self, index: usize, ) -> NDPoint<2> { - let out = NDPoint { + NDPoint { values: [self.traces[index].rt, self.traces[index].mobility], - }; - out + } } fn num_ndpoints(&self) -> usize { self.traces.len() @@ -221,7 +218,7 @@ impl QueriableIndexedPoints<2> for QueriableTraces { let (ibstart, ibend) = binary_search_slice( bucket, - |a, b| a.mobility.partial_cmp(&b).unwrap(), + |a, b| a.mobility.partial_cmp(b).unwrap(), start_ims, end_ims, ); @@ -246,7 +243,7 @@ impl QueriableIndexedPoints<2> for QueriableTraces { } } - if out.len() == 0 { + if out.is_empty() { info!( "No traces found for query: \n{:?} -> {:?}\n", boundary, reference_point diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index 322564b..c44c639 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -2,23 +2,19 @@ use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; use crate::aggregation::chromatograms::{ BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, }; -use crate::aggregation::dbscan::dbscan::dbscan_generic; + use crate::aggregation::dbscan::runner::dbscan_label_clusters; use crate::aggregation::queriable_collections::queriable_indexed_points::{ QueriableTimeTimsPeaks, TimeTimsPeakScaling, }; -use crate::aggregation::queriable_collections::queriable_traces::BaseTraceDistance; + use crate::ms::frames::DenseFrameWindow; -use crate::space::space_generics::{ - AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, NDPoint, - NDPointConverter, QueriableIndexedPoints, TraceLike, -}; -use crate::space::space_generics::{IntenseAtIndex, NDBoundary}; +use crate::space::space_generics::{HasIntensity, TraceLike}; + use crate::utils; -use crate::utils::{binary_search_slice, RollingSDCalculator}; +use crate::utils::RollingSDCalculator; -use core::panic; -use log::{debug, error, info, warn}; +use log::{debug, info, warn}; use rayon::iter::IntoParallelIterator; use rayon::prelude::*; use serde::ser::{SerializeStruct, Serializer}; diff --git a/src/main.rs b/src/main.rs index 463ac94..7511263 100644 --- a/src/main.rs +++ b/src/main.rs @@ -114,7 +114,7 @@ fn main() { .debug_scans_json .as_ref() .map(|path| out_path_dir.join(path).to_path_buf()); - let out_traces_path = config + let _out_traces_path = config .output_config .debug_traces_csv .as_ref() diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index f0e4334..5855e8f 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -1,4 +1,3 @@ -use log::info; use serde::Serialize; use std::fmt; use timsrust::{Frame, FrameType}; @@ -379,7 +378,7 @@ impl<'a> FrameSlice<'a> { debug_assert!(!ranges.any_overlap()); } - if ranges.ranges.len() == 0 { + if ranges.ranges.is_empty() { None } else { Some(ranges) @@ -410,7 +409,7 @@ impl ExpandedFrameSlice { .zip(scan_numbers.iter()) .collect::>(); - zipped.sort_unstable_by(|a, b| a.0 .0.cmp(&b.0 .0)); + zipped.sort_unstable_by(|a, b| a.0 .0.cmp(b.0 .0)); let (tof_indices, intensities, scan_numbers) = zipped.into_iter().fold( (Vec::new(), Vec::new(), Vec::new()), @@ -730,7 +729,7 @@ impl QueriableIndexedPoints<2> for ExpandedFrameSlice { fn query_ndrange( &self, boundary: &NDBoundary<2>, - reference_point: Option<&NDPoint<2>>, + _reference_point: Option<&NDPoint<2>>, ) -> Vec { // TODO implement passing information on the mz tolerance ... // info!("Querying frame slice with boundary: {:?}", boundary); @@ -789,7 +788,7 @@ impl<'a> QueriableIndexedPoints<2> for FrameSlice<'a> { fn query_ndrange( &self, boundary: &NDBoundary<2>, - reference_point: Option<&NDPoint<2>>, + _reference_point: Option<&NDPoint<2>>, ) -> Vec { // TODO implement passing information on the mz tolerance ... // info!("Querying frame slice with boundary: {:?}", boundary); diff --git a/src/ms/frames/frame_slice_rt_window.rs b/src/ms/frames/frame_slice_rt_window.rs index 69565cf..ef881bc 100644 --- a/src/ms/frames/frame_slice_rt_window.rs +++ b/src/ms/frames/frame_slice_rt_window.rs @@ -10,7 +10,7 @@ use crate::{ }, }; -use super::{ExpandedFrameSlice, FrameSlice, TimsPeak}; +use super::{ExpandedFrameSlice, TimsPeak}; #[derive(Debug, Serialize)] pub struct FrameSliceWindow<'a> { @@ -28,7 +28,7 @@ pub struct MaybeIntenseRawPeak { } impl FrameSliceWindow<'_> { - pub fn new<'a>(window: &'a [ExpandedFrameSlice]) -> FrameSliceWindow<'a> { + pub fn new(window: &[ExpandedFrameSlice]) -> FrameSliceWindow<'_> { let cum_lengths = window .iter() .map(|x| x.num_ndpoints()) @@ -59,7 +59,7 @@ impl FrameSliceWindow<'_> { } debug_assert!( - index < self.cum_lengths.last().unwrap().clone(), + index < *self.cum_lengths.last().unwrap(), "Index out of bounds, generated index: {}, pos: {}, cum_lengths: {:?}", index, pos, @@ -88,17 +88,17 @@ impl AsAggregableAtIndex for FrameSliceWindow<'_> { let tof = tmp.tof_indices[within_window_index]; let int = tmp.intensities[within_window_index]; let scan = tmp.scan_numbers[within_window_index]; - let foo = MaybeIntenseRawPeak { + + MaybeIntenseRawPeak { intensity: int, tof_index: tof, scan_index: scan, weight_only: pos != self.reference_index, - }; - foo + } } fn num_aggregable(&self) -> usize { - self.cum_lengths.last().unwrap().clone() + *self.cum_lengths.last().unwrap() } } @@ -124,7 +124,7 @@ impl IntenseAtIndex for FrameSliceWindow<'_> { } fn intensity_index_length(&self) -> usize { - self.cum_lengths.last().unwrap().clone() + *self.cum_lengths.last().unwrap() } } @@ -135,7 +135,7 @@ impl<'a> QueriableIndexedPoints<2> for FrameSliceWindow<'a> { ) -> Vec { let mut out = Vec::new(); let mut last_cum_length = 0; - for (i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() + for (_i, (frame, cum_length)) in self.window.iter().zip(self.cum_lengths.iter()).enumerate() { let local_outs = frame.query_ndpoint(point); for ii in local_outs { @@ -152,7 +152,7 @@ impl<'a> QueriableIndexedPoints<2> for FrameSliceWindow<'a> { reference_point: Option<&NDPoint<2>>, ) -> Vec { let mut out = Vec::new(); - let last = self.cum_lengths.last().unwrap().clone(); + let last = *self.cum_lengths.last().unwrap(); let mut last_cum_length = 0; for (frame, cum_length) in self.window.iter().zip(self.cum_lengths.iter()) { let local_outs = frame.query_ndrange(boundary, reference_point); @@ -174,8 +174,8 @@ impl DistantAtIndex for FrameSliceWindow<'_> { index: usize, other: usize, ) -> f32 { - let (pos, within_window_index) = self.get_window_index(index); - let (pos_other, within_window_index_other) = self.get_window_index(other); + let (_pos, _within_window_index) = self.get_window_index(index); + let (_pos_other, _within_window_index_other) = self.get_window_index(other); panic!("unimplemented"); 0. } @@ -191,7 +191,7 @@ impl AsNDPointsAtIndex<2> for FrameSliceWindow<'_> { } fn num_ndpoints(&self) -> usize { - self.cum_lengths.last().unwrap().clone() + *self.cum_lengths.last().unwrap() } } diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index fa4f348..0a79974 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -265,7 +265,7 @@ impl<'a, const D: usize> QueriableIndexedPoints for RadiusKDTree<'a, usize, D &self, point: &NDPoint, ) -> Vec { - self.query(point).into_iter().map(|x| *x).collect() + self.query(point).into_iter().copied().collect() } fn query_ndrange( @@ -276,7 +276,7 @@ impl<'a, const D: usize> QueriableIndexedPoints for RadiusKDTree<'a, usize, D let candidates = self.query_range(boundary); if let Some(point) = reference_point { let tmp = self.refine_query(point, candidates); - tmp.into_iter().map(|x| *x).collect() + tmp.into_iter().copied().collect() } else { candidates.iter().map(|x| *x.1).collect() } diff --git a/src/space/space_generics.rs b/src/space/space_generics.rs index 52aa23d..768457a 100644 --- a/src/space/space_generics.rs +++ b/src/space/space_generics.rs @@ -205,12 +205,10 @@ pub trait IntenseAtIndex { indices.par_sort_unstable_by_key(|&x| x.1); debug_assert!(indices.len() == self.intensity_index_length()); - if cfg!(debug_assertions) { - if indices.len() > 1 { - for i in 1..indices.len() { - if indices[i - 1].1 > indices[i].1 { - panic!("Indices are not sorted"); - } + if cfg!(debug_assertions) && indices.len() > 1 { + for i in 1..indices.len() { + if indices[i - 1].1 > indices[i].1 { + panic!("Indices are not sorted"); } } } diff --git a/src/utils.rs b/src/utils.rs index 8d8b705..320df2c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -500,24 +500,22 @@ where let freq = std::env::var("IONMESH_DEBUG_JSON_FREQUENCY"); if let Ok(freq) = freq { let freq = freq.parse::().unwrap(); - if force || (freq > 0) { - if force || (rand::random::() % freq == 0) { - let json = serde_json::to_string_pretty(obj).unwrap(); - let path = std::env::var("IONMESH_DEBUG_JSON_PATH"); - let path = if let Ok(path) = path { - if !std::path::Path::new(&path).exists() { - std::fs::create_dir_all(&path).unwrap(); - } - std::path::Path::new(&path).join(format!("{}.json", name)) - } else { - warn!("IONMESH_DEBUG_JSON_PATH not set, saving to current directory"); - std::path::Path::new(".").join(format!("{}.json", name)) - }; - info!("Saving json to {:?}", path); - - std::fs::write(path, json).unwrap(); - return true; - } + if (force || (freq > 0)) && (force || (rand::random::() % freq == 0)) { + let json = serde_json::to_string_pretty(obj).unwrap(); + let path = std::env::var("IONMESH_DEBUG_JSON_PATH"); + let path = if let Ok(path) = path { + if !std::path::Path::new(&path).exists() { + std::fs::create_dir_all(&path).unwrap(); + } + std::path::Path::new(&path).join(format!("{}.json", name)) + } else { + warn!("IONMESH_DEBUG_JSON_PATH not set, saving to current directory"); + std::path::Path::new(".").join(format!("{}.json", name)) + }; + info!("Saving json to {:?}", path); + + std::fs::write(path, json).unwrap(); + return true; } } } From eab0fe2be4bce9b0dd76c8e4ca523e464234d320 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 25 Jul 2024 06:18:33 -0700 Subject: [PATCH 23/26] (chore) moved to nightly cargo fmt --- .pre-commit-config.yaml | 2 +- rustfmt.toml | 13 ++-- src/aggregation/aggregators.rs | 6 +- src/aggregation/chromatograms.rs | 16 +++-- src/aggregation/converters.rs | 6 +- src/aggregation/dbscan/dbscan.rs | 31 +++++++--- src/aggregation/dbscan/denseframe_dbscan.rs | 16 ++++- src/aggregation/dbscan/runner.rs | 20 ++++--- src/aggregation/ms_denoise.rs | 60 ++++++++++++------- src/aggregation/pseudospectra.rs | 39 ++++++------ .../queriable_indexed_points.rs | 23 ++++--- .../queriable_collections/queriable_traces.rs | 22 ++++--- src/aggregation/tracing.rs | 49 ++++++++++----- src/main.rs | 10 +++- src/ms/frames/dense_frame_window.rs | 24 +++++--- src/ms/frames/frame_slice.rs | 19 +++--- src/ms/frames/frame_slice_rt_window.rs | 31 ++++++---- src/ms/frames/frames.rs | 10 +++- src/ms/frames/mod.rs | 17 +++++- src/ms/tdf.rs | 31 +++++++--- src/scoring.rs | 47 +++++++++------ src/space/kdtree.rs | 7 ++- src/space/quad.rs | 8 ++- src/utils.rs | 19 ++++-- tests/test_window_parsing.rs | 5 +- 25 files changed, 355 insertions(+), 176 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67bb92a..cab56ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,6 @@ repos: - id: rustfmt name: rustfmt description: Check if all files follow the rustfmt style - entry: cargo fmt --all -- --check --color always + entry: cargo +nightly fmt --all -- --check --color always language: system pass_filenames: false diff --git a/rustfmt.toml b/rustfmt.toml index d966bd7..dea4d29 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -3,10 +3,9 @@ fn_params_layout = "Vertical" match_block_trailing_comma = true newline_style = "Unix" -## Unstable features :( -# group_imports = "StdExternalCrate" -# imports_granularity = "Module" -# imports_layout = "Vertical" -# merge_imports = true -# format_strings = true -# struct_lit_single_line = false +## Unstable features :( require nightly toolchain +group_imports = "StdExternalCrate" +imports_granularity = "Module" +imports_layout = "Vertical" +format_strings = true +struct_lit_single_line = false diff --git a/src/aggregation/aggregators.rs b/src/aggregation/aggregators.rs index 8a58358..47eaa6b 100644 --- a/src/aggregation/aggregators.rs +++ b/src/aggregation/aggregators.rs @@ -1,9 +1,9 @@ +use rayon::prelude::*; + use crate::ms::frames::TimsPeak; -use crate::space::space_generics::{AsAggregableAtIndex, HasIntensity}; +use crate::space::space_generics::AsAggregableAtIndex; use crate::utils; -use rayon::prelude::*; - // I Dont really like having this here but I am not sure where else to // define it ... since its needed by the aggregation functions #[derive(Debug, PartialEq, Clone, Copy)] diff --git a/src/aggregation/chromatograms.rs b/src/aggregation/chromatograms.rs index 37c7b2d..ba7b0b5 100644 --- a/src/aggregation/chromatograms.rs +++ b/src/aggregation/chromatograms.rs @@ -1,9 +1,13 @@ +use std::collections::BTreeMap; +use std::ops::{ + Add, + AddAssign, + Mul, +}; + use log::warn; use num_traits::AsPrimitive; -use std::collections::BTreeMap; -use std::ops::{Add, AddAssign, Mul}; - // Needs to be odd pub const NUM_LOCAL_CHROMATOGRAM_BINS: usize = 21; @@ -205,10 +209,12 @@ impl BTreeChromatogram { // array if curr_width > max_chr_arr_width * 2. { warn!( - "Warning: Chromatogram range is larger than 2x the width of the chromatogram array {} vs {} at RT: {}", + "Warning: Chromatogram range is larger than 2x the width of the chromatogram \ + array {} vs {} at RT: {}", curr_width, max_chr_arr_width, - out.rt_bin_offset.unwrap()); + out.rt_bin_offset.unwrap() + ); let arr_intensities = out.total_intensity(); let btree_intensities = self.total_intensity() as f32; let ratio = arr_intensities / btree_intensities; diff --git a/src/aggregation/converters.rs b/src/aggregation/converters.rs index d1a5c41..3fcf2bb 100644 --- a/src/aggregation/converters.rs +++ b/src/aggregation/converters.rs @@ -1,6 +1,8 @@ use crate::ms::frames::TimsPeak; -use crate::space::space_generics::NDPoint; -use crate::space::space_generics::NDPointConverter; +use crate::space::space_generics::{ + NDPoint, + NDPointConverter, +}; // https://github.com/rust-lang/rust/issues/35121 // The never type is not stable yet.... diff --git a/src/aggregation/dbscan/dbscan.rs b/src/aggregation/dbscan/dbscan.rs index e0190bc..72fdb77 100644 --- a/src/aggregation/dbscan/dbscan.rs +++ b/src/aggregation/dbscan/dbscan.rs @@ -1,16 +1,29 @@ -use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; -use crate::space::kdtree::RadiusKDTree; -use crate::space::space_generics::{ - AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, HasIntensity, IntenseAtIndex, NDPoint, - NDPointConverter, QueriableIndexedPoints, -}; -use crate::utils::{self, ContextTimer}; -use log::info; -use rayon::prelude::*; use std::fmt::Debug; use std::ops::Add; +use log::info; +use rayon::prelude::*; + +use crate::aggregation::aggregators::{ + aggregate_clusters, + ClusterAggregator, +}; use crate::aggregation::dbscan::runner::dbscan_label_clusters; +use crate::space::kdtree::RadiusKDTree; +use crate::space::space_generics::{ + AsAggregableAtIndex, + AsNDPointsAtIndex, + DistantAtIndex, + HasIntensity, + IntenseAtIndex, + NDPoint, + NDPointConverter, + QueriableIndexedPoints, +}; +use crate::utils::{ + self, + ContextTimer, +}; // Pretty simple function ... it uses every passed centroid, converts it to a point // and generates a new centroid that aggregates all the points in its range. diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 92cb3fa..87feca7 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -1,8 +1,18 @@ use crate::aggregation::aggregators::TimsPeakAggregator; -use crate::aggregation::converters::{BypassDenseFrameBackConverter, DenseFrameConverter}; +use crate::aggregation::converters::{ + BypassDenseFrameBackConverter, + DenseFrameConverter, +}; use crate::aggregation::dbscan::dbscan::dbscan_generic; -use crate::ms::frames::{DenseFrame, TimsPeak}; -use crate::space::space_generics::{AsAggregableAtIndex, DistantAtIndex, IntenseAtIndex}; +use crate::ms::frames::{ + DenseFrame, + TimsPeak, +}; +use crate::space::space_generics::{ + AsAggregableAtIndex, + DistantAtIndex, + IntenseAtIndex, +}; use crate::utils::within_distance_apply; // bool> diff --git a/src/aggregation/dbscan/runner.rs b/src/aggregation/dbscan/runner.rs index 3bb8c58..bcbf40b 100644 --- a/src/aggregation/dbscan/runner.rs +++ b/src/aggregation/dbscan/runner.rs @@ -1,16 +1,22 @@ -use crate::space::space_generics::AsAggregableAtIndex; -use crate::space::space_generics::{ - AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, -}; -use crate::utils; use core::fmt::Debug; -use indicatif::ProgressIterator; -use log::debug; use std::marker::PhantomData; use std::sync::Arc; +use indicatif::ProgressIterator; +use log::debug; + use crate::aggregation::aggregators::ClusterLabel; use crate::aggregation::dbscan::utils::FilterFunCache; +use crate::space::space_generics::{ + AsAggregableAtIndex, + AsNDPointsAtIndex, + DistantAtIndex, + IntenseAtIndex, + NDBoundary, + NDPoint, + QueriableIndexedPoints, +}; +use crate::utils; /// Density-based spatial clustering of applications with noise (DBSCAN) /// diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index 58e2986..e41125f 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,31 +1,44 @@ use core::fmt::Debug; use core::panic; -use serde::{Deserialize, Serialize}; - -use crate::aggregation::dbscan::denseframe_dbscan::dbscan_denseframe; -use crate::ms::frames::frame_slice_rt_window::FrameSliceWindow; -use crate::ms::frames::frame_slice_rt_window::RawWeightedTimsPeakAggregator; -use crate::ms::frames::Converters; -use crate::ms::frames::DenseFrame; -use crate::ms::frames::DenseFrameWindow; -use crate::ms::frames::ExpandedFrameSlice; -use crate::ms::frames::FrameSlice; -use crate::ms::frames::MsMsFrameSliceWindowInfo; -use crate::ms::frames::TimsPeak; -use crate::ms::tdf; -use crate::ms::tdf::DIAFrameInfo; -use crate::space::space_generics::AsNDPointsAtIndex; -use crate::space::space_generics::IntenseAtIndex; -use crate::utils; -use crate::utils::maybe_save_json_if_debugging; use indicatif::ParallelProgressIterator; -use log::{debug, info, trace, warn}; +use log::{ + debug, + info, + trace, + warn, +}; use rayon::prelude::*; +use serde::{ + Deserialize, + Serialize, +}; use timsrust::Frame; use super::aggregators::aggregate_clusters; use super::dbscan::runner::dbscan_label_clusters; +use crate::aggregation::dbscan::denseframe_dbscan::dbscan_denseframe; +use crate::ms::frames::frame_slice_rt_window::{ + FrameSliceWindow, + RawWeightedTimsPeakAggregator, +}; +use crate::ms::frames::{ + Converters, + DenseFrame, + DenseFrameWindow, + ExpandedFrameSlice, + FrameSlice, + MsMsFrameSliceWindowInfo, + TimsPeak, +}; +use crate::ms::tdf; +use crate::ms::tdf::DIAFrameInfo; +use crate::space::space_generics::{ + AsNDPointsAtIndex, + IntenseAtIndex, +}; +use crate::utils; +use crate::utils::maybe_save_json_if_debugging; // TODO I can probably split the ms1 and ms2 ... #[derive(Debug, Serialize, Deserialize, Clone, Copy)] @@ -95,8 +108,13 @@ fn _sanity_check_framestats( let peak_ratio = frame_stats_end.num_peaks as f64 / frame_stats_start.num_peaks as f64; trace!( - "Denoising frame {} with intensity ratio {:.2}, peak_ratio {:.2}, prior_max {}, curr_max {}", - frame_index, intensity_ratio, peak_ratio, frame_stats_start.max_intensity, frame_stats_end.max_intensity, + "Denoising frame {} with intensity ratio {:.2}, peak_ratio {:.2}, prior_max {}, curr_max \ + {}", + frame_index, + intensity_ratio, + peak_ratio, + frame_stats_start.max_intensity, + frame_stats_end.max_intensity, ); if frame_stats_end.max_intensity < frame_stats_start.max_intensity { trace!( diff --git a/src/aggregation/pseudospectra.rs b/src/aggregation/pseudospectra.rs index 849d354..9be748d 100644 --- a/src/aggregation/pseudospectra.rs +++ b/src/aggregation/pseudospectra.rs @@ -1,28 +1,33 @@ -use crate::aggregation::aggregators::ClusterAggregator; +use std::error::Error; +use std::io::Write; +use std::path::Path; -use crate::aggregation::dbscan::dbscan::{dbscan_aggregate, reassign_centroid}; +use log::info; +use rayon::prelude::*; +use serde::{ + Deserialize, + Serialize, +}; +use super::tracing::BaseTrace; +use crate::aggregation::aggregators::ClusterAggregator; +use crate::aggregation::dbscan::dbscan::{ + dbscan_aggregate, + reassign_centroid, +}; use crate::aggregation::queriable_collections::queriable_traces::{ - BaseTraceDistance, TraceScalings, + BaseTraceDistance, + TraceScalings, }; use crate::aggregation::queriable_collections::QueriableTraces; - -use crate::space::space_generics::{HasIntensity, NDPoint, NDPointConverter}; - +use crate::space::space_generics::{ + HasIntensity, + NDPoint, + NDPointConverter, +}; use crate::utils; use crate::utils::RollingSDCalculator; -use log::info; - -use rayon::prelude::*; - -use serde::{Deserialize, Serialize}; -use std::error::Error; -use std::io::Write; -use std::path::Path; - -use super::tracing::BaseTrace; - #[derive(Debug, Serialize, Deserialize, Clone, Copy)] pub struct PseudoscanGenerationConfig { pub rt_scaling: f32, diff --git a/src/aggregation/queriable_collections/queriable_indexed_points.rs b/src/aggregation/queriable_collections/queriable_indexed_points.rs index 710082d..af4f984 100644 --- a/src/aggregation/queriable_collections/queriable_indexed_points.rs +++ b/src/aggregation/queriable_collections/queriable_indexed_points.rs @@ -1,13 +1,17 @@ -pub use crate::{ - aggregation::tracing::TimeTimsPeak, - space::space_generics::{ - AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDBoundary, - NDPoint, QueriableIndexedPoints, - }, - utils::binary_search_slice, -}; use rayon::prelude::*; +pub use crate::aggregation::tracing::TimeTimsPeak; +pub use crate::space::space_generics::{ + AsAggregableAtIndex, + AsNDPointsAtIndex, + DistantAtIndex, + IntenseAtIndex, + NDBoundary, + NDPoint, + QueriableIndexedPoints, +}; +pub use crate::utils::binary_search_slice; + #[derive(Debug)] pub struct TimeTimsPeakScaling { pub mz_scaling: f32, @@ -221,7 +225,8 @@ impl QueriableIndexedPoints<3> for QueriableTimeTimsPeaks { { debug_assert!( peak.rt >= rt_range.0 && peak.rt <= rt_range.1, - "RT out of range -> {} {} {}; istart {}, page_starrt {}, j {}; window rts: {:?}", + "RT out of range -> {} {} {}; istart {}, page_starrt {}, j {}; window rts: \ + {:?}", peak.rt, rt_range.0, rt_range.1, diff --git a/src/aggregation/queriable_collections/queriable_traces.rs b/src/aggregation/queriable_collections/queriable_traces.rs index fcddbde..f931f6e 100644 --- a/src/aggregation/queriable_collections/queriable_traces.rs +++ b/src/aggregation/queriable_collections/queriable_traces.rs @@ -1,15 +1,19 @@ -use crate::aggregation::tracing::BaseTrace; -pub use crate::{ - aggregation::tracing::TimeTimsPeak, - space::space_generics::{ - AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDBoundary, - NDPoint, QueriableIndexedPoints, - }, - utils::binary_search_slice, -}; use log::info; use rayon::prelude::*; +use crate::aggregation::tracing::BaseTrace; +pub use crate::aggregation::tracing::TimeTimsPeak; +pub use crate::space::space_generics::{ + AsAggregableAtIndex, + AsNDPointsAtIndex, + DistantAtIndex, + IntenseAtIndex, + NDBoundary, + NDPoint, + QueriableIndexedPoints, +}; +pub use crate::utils::binary_search_slice; + #[derive(Debug)] pub struct TraceScalings { pub rt_scaling: f64, diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index c44c639..d0fd27f 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -1,28 +1,45 @@ -use crate::aggregation::aggregators::{aggregate_clusters, ClusterAggregator}; -use crate::aggregation::chromatograms::{ - BTreeChromatogram, ChromatogramArray, NUM_LOCAL_CHROMATOGRAM_BINS, +use std::error::Error; +use std::io::Write; +use std::path::Path; + +use log::{ + debug, + info, + warn, +}; +use rayon::iter::IntoParallelIterator; +use rayon::prelude::*; +use serde::ser::{ + SerializeStruct, + Serializer, +}; +use serde::{ + Deserialize, + Serialize, }; +use crate::aggregation::aggregators::{ + aggregate_clusters, + ClusterAggregator, +}; +use crate::aggregation::chromatograms::{ + BTreeChromatogram, + ChromatogramArray, + NUM_LOCAL_CHROMATOGRAM_BINS, +}; use crate::aggregation::dbscan::runner::dbscan_label_clusters; use crate::aggregation::queriable_collections::queriable_indexed_points::{ - QueriableTimeTimsPeaks, TimeTimsPeakScaling, + QueriableTimeTimsPeaks, + TimeTimsPeakScaling, }; - use crate::ms::frames::DenseFrameWindow; -use crate::space::space_generics::{HasIntensity, TraceLike}; - +use crate::space::space_generics::{ + HasIntensity, + TraceLike, +}; use crate::utils; use crate::utils::RollingSDCalculator; -use log::{debug, info, warn}; -use rayon::iter::IntoParallelIterator; -use rayon::prelude::*; -use serde::ser::{SerializeStruct, Serializer}; -use serde::{Deserialize, Serialize}; -use std::error::Error; -use std::io::Write; -use std::path::Path; - type QuadLowHigh = (f64, f64); #[derive(Debug, Serialize, Deserialize, Clone, Copy)] diff --git a/src/main.rs b/src/main.rs index 7511263..fe0557c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,13 +17,17 @@ mod utils; extern crate log; extern crate pretty_env_logger; +use std::fs; +use std::path::Path; + use clap::Parser; use log::debug; +use serde::{ + Deserialize, + Serialize, +}; use crate::scoring::SageSearchConfig; -use serde::{Deserialize, Serialize}; -use std::fs; -use std::path::Path; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] diff --git a/src/ms/frames/dense_frame_window.rs b/src/ms/frames/dense_frame_window.rs index db6400a..9e38787 100644 --- a/src/ms/frames/dense_frame_window.rs +++ b/src/ms/frames/dense_frame_window.rs @@ -1,13 +1,23 @@ +use log::info; use serde::Serialize; -use timsrust::{ConvertableIndex, Frame, Scan2ImConverter, Tof2MzConverter}; - -use crate::ms::{ - frames::MsMsFrameSliceWindowInfo, - tdf::{DIAFrameInfo, ScanRange}, +use timsrust::{ + ConvertableIndex, + Frame, + Scan2ImConverter, + Tof2MzConverter, }; -use super::{frames::SortingOrder, DenseFrame, FrameSlice, TimsPeak}; -use log::info; +use super::frames::SortingOrder; +use super::{ + DenseFrame, + FrameSlice, + TimsPeak, +}; +use crate::ms::frames::MsMsFrameSliceWindowInfo; +use crate::ms::tdf::{ + DIAFrameInfo, + ScanRange, +}; pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); fn check_peak_sanity(peak: &TimsPeak) { diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index 5855e8f..05b9f6e 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -1,15 +1,20 @@ -use serde::Serialize; use std::fmt; -use timsrust::{Frame, FrameType}; -use crate::{ - space::space_generics::{ - AsNDPointsAtIndex, IntenseAtIndex, NDBoundary, NDPoint, QueriableIndexedPoints, - }, - utils::binary_search_slice, +use serde::Serialize; +use timsrust::{ + Frame, + FrameType, }; use super::FrameMsMsWindowInfo; +use crate::space::space_generics::{ + AsNDPointsAtIndex, + IntenseAtIndex, + NDBoundary, + NDPoint, + QueriableIndexedPoints, +}; +use crate::utils::binary_search_slice; #[derive(Debug, Clone, Copy)] pub enum ScanNumberType { diff --git a/src/ms/frames/frame_slice_rt_window.rs b/src/ms/frames/frame_slice_rt_window.rs index ef881bc..825a81e 100644 --- a/src/ms/frames/frame_slice_rt_window.rs +++ b/src/ms/frames/frame_slice_rt_window.rs @@ -2,15 +2,19 @@ use log::trace; use serde::Serialize; use timsrust::ConvertableIndex; -use crate::{ - aggregation::aggregators::ClusterAggregator, - space::space_generics::{ - AsAggregableAtIndex, AsNDPointsAtIndex, DistantAtIndex, IntenseAtIndex, NDPoint, - QueriableIndexedPoints, - }, +use super::{ + ExpandedFrameSlice, + TimsPeak, +}; +use crate::aggregation::aggregators::ClusterAggregator; +use crate::space::space_generics::{ + AsAggregableAtIndex, + AsNDPointsAtIndex, + DistantAtIndex, + IntenseAtIndex, + NDPoint, + QueriableIndexedPoints, }; - -use super::{ExpandedFrameSlice, TimsPeak}; #[derive(Debug, Serialize)] pub struct FrameSliceWindow<'a> { @@ -68,9 +72,14 @@ impl FrameSliceWindow<'_> { let within_window_index = index - last_cum_length; if cfg!(debug_assertions) { - assert!(self.window[pos].intensities.len() > within_window_index, - "Index out of bounds, generated index: {}, within_window_index: {}, pos: {}, cum_lengths: {:?}", - index, within_window_index, pos, self.cum_lengths, + assert!( + self.window[pos].intensities.len() > within_window_index, + "Index out of bounds, generated index: {}, within_window_index: {}, pos: {}, \ + cum_lengths: {:?}", + index, + within_window_index, + pos, + self.cum_lengths, ); } diff --git a/src/ms/frames/frames.rs b/src/ms/frames/frames.rs index 0bb455d..3d71cba 100644 --- a/src/ms/frames/frames.rs +++ b/src/ms/frames/frames.rs @@ -1,8 +1,12 @@ use serde::Serialize; -pub use timsrust::Frame; -pub use timsrust::FrameType; pub use timsrust::{ - ConvertableIndex, FileReader, Frame2RtConverter, Scan2ImConverter, Tof2MzConverter, + ConvertableIndex, + FileReader, + Frame, + Frame2RtConverter, + FrameType, + Scan2ImConverter, + Tof2MzConverter, }; use crate::space::space_generics::HasIntensity; diff --git a/src/ms/frames/mod.rs b/src/ms/frames/mod.rs index 13c348b..64a8db0 100644 --- a/src/ms/frames/mod.rs +++ b/src/ms/frames/mod.rs @@ -2,6 +2,17 @@ pub mod dense_frame_window; pub mod frame_slice; pub mod frame_slice_rt_window; pub mod frames; -pub use dense_frame_window::{Converters, DenseFrameWindow}; -pub use frame_slice::{ExpandedFrameSlice, FrameSlice, MsMsFrameSliceWindowInfo}; -pub use frames::{DenseFrame, FrameMsMsWindowInfo, TimsPeak}; +pub use dense_frame_window::{ + Converters, + DenseFrameWindow, +}; +pub use frame_slice::{ + ExpandedFrameSlice, + FrameSlice, + MsMsFrameSliceWindowInfo, +}; +pub use frames::{ + DenseFrame, + FrameMsMsWindowInfo, + TimsPeak, +}; diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs index a08beb5..6fbfea2 100644 --- a/src/ms/tdf.rs +++ b/src/ms/tdf.rs @@ -1,13 +1,27 @@ -use log::{debug, info}; - -use sqlx::Pool; -use sqlx::{FromRow, Sqlite, SqlitePool}; use std::path::Path; -use timsrust::{ConvertableIndex, Frame}; + +use log::{ + debug, + info, +}; +use sqlx::{ + FromRow, + Pool, + Sqlite, + SqlitePool, +}; +use timsrust::{ + ConvertableIndex, + Frame, +}; use tokio; use tokio::runtime::Runtime; -use crate::ms::frames::{FrameMsMsWindowInfo, FrameSlice, MsMsFrameSliceWindowInfo}; +use crate::ms::frames::{ + FrameMsMsWindowInfo, + FrameSlice, + MsMsFrameSliceWindowInfo, +}; // Diaframemsmsinfo = vec of frame_id -> windowgroup_id // diaframemsmswindows = vec[(windowgroup_id, scanstart, scanend, iso_mz, iso_with, nce)] @@ -494,7 +508,10 @@ impl FrameInfoBuilder { ); GroupingLevel::WindowGroup } else { - log::info!("Less than 200 scan ranges detected, using QuadWindowGroup grouping level. (diaPASEF?)"); + log::info!( + "Less than 200 scan ranges detected, using QuadWindowGroup grouping level. \ + (diaPASEF?)" + ); GroupingLevel::QuadWindowGroup }; diff --git a/src/scoring.rs b/src/scoring.rs index edb8498..5d053f4 100644 --- a/src/scoring.rs +++ b/src/scoring.rs @@ -1,29 +1,39 @@ +use std::collections::HashMap; +use std::error::Error; +use std::fs; +use std::path::PathBuf; use std::str::FromStr; -use crate::aggregation::pseudospectra::PseudoSpectrum; use indicatif::ParallelProgressIterator; use log::warn; - -use sage_core::database::Parameters as SageDatabaseParameters; -use sage_core::database::{EnzymeBuilder, IndexedDatabase}; +use rayon::prelude::*; +use sage_core::database::{ + EnzymeBuilder, + IndexedDatabase, + Parameters as SageDatabaseParameters, +}; use sage_core::ion_series::Kind; use sage_core::mass::Tolerance; use sage_core::ml::linear_discriminant::score_psms; use sage_core::modification::ModificationSpecificity; -use sage_core::scoring::Feature; -use sage_core::scoring::Scorer; -use sage_core::spectrum::{Precursor, RawSpectrum, Representation, SpectrumProcessor}; +use sage_core::scoring::{ + Feature, + Scorer, +}; +use sage_core::spectrum::{ + Precursor, + RawSpectrum, + Representation, + SpectrumProcessor, +}; use serde::ser::SerializeStruct; -use serde::Deserialize; -use serde::Serialize; -use serde::Serializer; +use serde::{ + Deserialize, + Serialize, + Serializer, +}; -use std::collections::HashMap; -use std::error::Error; -use std::fs; -use std::path::PathBuf; - -use rayon::prelude::*; +use crate::aggregation::pseudospectra::PseudoSpectrum; const PCT_BP_KEEP: f64 = 0.001; @@ -312,7 +322,10 @@ pub fn score_pseudospectra( match out_path_features { None => {}, Some(out_path_features) => { - warn!("Writing features to features.csv ... and sebastian should delete this b4 publishing..."); + warn!( + "Writing features to features.csv ... and sebastian should delete this b4 \ + publishing..." + ); let mut wtr = csv::Writer::from_path(out_path_features)?; for feat in &features { let s_feat = SerializableFeature::from_feature(feat, &db); diff --git a/src/space/kdtree.rs b/src/space/kdtree.rs index 0a79974..02fa3dc 100644 --- a/src/space/kdtree.rs +++ b/src/space/kdtree.rs @@ -1,6 +1,11 @@ -use crate::space::space_generics::{NDBoundary, NDPoint, QueriableIndexedPoints}; use log::warn; +use crate::space::space_generics::{ + NDBoundary, + NDPoint, + QueriableIndexedPoints, +}; + // Implements a kdtree with several minor differences. #[derive(Debug, Clone)] pub struct RadiusKDTree<'a, T, const DIMENSIONALITY: usize> { diff --git a/src/space/quad.rs b/src/space/quad.rs index 6dc8851..647cbc7 100644 --- a/src/space/quad.rs +++ b/src/space/quad.rs @@ -1,7 +1,13 @@ -use crate::space::space_generics::{NDBoundary, NDPoint, QueriableIndexedPoints}; use core::panic; + use log::trace; +use crate::space::space_generics::{ + NDBoundary, + NDPoint, + QueriableIndexedPoints, +}; + #[derive(Debug, Clone)] pub struct RadiusQuadTree<'a, T> { boundary: NDBoundary<2>, diff --git a/src/utils.rs b/src/utils.rs index 320df2c..5f43346 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,10 +1,17 @@ -use log::{debug, info, trace, warn}; -use num::cast::AsPrimitive; -use std::{ - cmp::Ordering, - fmt::Debug, - time::{Duration, Instant}, +use std::cmp::Ordering; +use std::fmt::Debug; +use std::time::{ + Duration, + Instant, +}; + +use log::{ + debug, + info, + trace, + warn, }; +use num::cast::AsPrimitive; pub struct ContextTimer { start: Instant, diff --git a/tests/test_window_parsing.rs b/tests/test_window_parsing.rs index 583601d..9df5bda 100644 --- a/tests/test_window_parsing.rs +++ b/tests/test_window_parsing.rs @@ -1,4 +1,7 @@ -use ionmesh::ms::tdf::{FrameInfoBuilder, GroupingLevel}; +use ionmesh::ms::tdf::{ + FrameInfoBuilder, + GroupingLevel, +}; #[test] fn test_dia_pasef() { From 6bf5b65e3517668b5c25a32ef5c1adf331dcb00e Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 25 Jul 2024 11:07:02 -0700 Subject: [PATCH 24/26] (chore) clippy nightly --- src/aggregation/queriable_collections/mod.rs | 1 - .../queriable_collections/queriable_traces.rs | 1 - src/ms/frames/frame_slice.rs | 7 ++----- src/ms/frames/frames.rs | 10 +--------- 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/aggregation/queriable_collections/mod.rs b/src/aggregation/queriable_collections/mod.rs index 880d64c..4f13017 100644 --- a/src/aggregation/queriable_collections/mod.rs +++ b/src/aggregation/queriable_collections/mod.rs @@ -1,4 +1,3 @@ pub mod queriable_indexed_points; pub mod queriable_traces; -pub use queriable_indexed_points::QueriableIndexedPoints; pub use queriable_traces::QueriableTraces; diff --git a/src/aggregation/queriable_collections/queriable_traces.rs b/src/aggregation/queriable_collections/queriable_traces.rs index f931f6e..3a90211 100644 --- a/src/aggregation/queriable_collections/queriable_traces.rs +++ b/src/aggregation/queriable_collections/queriable_traces.rs @@ -2,7 +2,6 @@ use log::info; use rayon::prelude::*; use crate::aggregation::tracing::BaseTrace; -pub use crate::aggregation::tracing::TimeTimsPeak; pub use crate::space::space_generics::{ AsAggregableAtIndex, AsNDPointsAtIndex, diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index 05b9f6e..48ba993 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -371,11 +371,8 @@ impl<'a> FrameSlice<'a> { tolerance, ); - match tmp { - Ok(Some(range_offset)) => { - ranges.ranges.push(range_offset); - }, - _ => (), + if let Ok(Some(range_offset)) = tmp { + ranges.ranges.push(range_offset); } } diff --git a/src/ms/frames/frames.rs b/src/ms/frames/frames.rs index 3d71cba..13d7851 100644 --- a/src/ms/frames/frames.rs +++ b/src/ms/frames/frames.rs @@ -1,13 +1,5 @@ use serde::Serialize; -pub use timsrust::{ - ConvertableIndex, - FileReader, - Frame, - Frame2RtConverter, - FrameType, - Scan2ImConverter, - Tof2MzConverter, -}; +pub use timsrust::FrameType; use crate::space::space_generics::HasIntensity; From 0b581ec726f6492d15c0972a81839bdbd4fa4831 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 26 Sep 2024 11:02:30 -0700 Subject: [PATCH 25/26] (wip) updates timsrust to v0.4.1 (#9) * chore(timsrust)!: (wip) updated timsrust versioncargo * chore: cargo update and fmt * (chore) removed unused imports --- Cargo.lock | 1636 +++---------------- Cargo.toml | 35 +- deny.toml | 62 +- src/aggregation/dbscan/denseframe_dbscan.rs | 12 +- src/aggregation/ms_denoise.rs | 155 +- src/aggregation/tracing.rs | 39 +- src/main.rs | 30 +- src/ms/frames/dense_frame_window.rs | 83 +- src/ms/frames/frame_slice.rs | 197 ++- src/ms/frames/frame_slice_rt_window.rs | 10 +- src/ms/frames/frames.rs | 72 +- src/ms/frames/mod.rs | 3 +- src/ms/mod.rs | 1 - src/ms/tdf.rs | 567 ------- src/scoring.rs | 16 +- tests/test_window_parsing.rs | 69 - 16 files changed, 709 insertions(+), 2278 deletions(-) delete mode 100644 src/ms/tdf.rs delete mode 100644 tests/test_window_parsing.rs diff --git a/Cargo.lock b/Cargo.lock index 10ad791..542b15f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,19 +3,10 @@ version = 3 [[package]] -name = "addr2line" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "ahash" @@ -55,12 +46,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - [[package]] name = "android-tzdata" version = "0.1.1" @@ -78,9 +63,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.14" +version = "0.6.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ "anstyle", "anstyle-parse", @@ -93,33 +78,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anstyle-parse" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", "windows-sys 0.52.0", @@ -213,59 +198,18 @@ dependencies = [ "num", ] -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" -[[package]] -name = "backtrace" -version = "0.3.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - [[package]] name = "base64" version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" -[[package]] -name = "base64ct" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" - [[package]] name = "bitflags" version = "1.3.2" @@ -274,21 +218,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" -dependencies = [ - "serde", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "brotli" @@ -319,9 +251,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" [[package]] name = "byteorder" @@ -331,19 +263,19 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "cc" -version = "1.0.98" +version = "1.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "07b1695e2c7e8fc85310cde85aeaab7e3097f593c91d209d3f9df76c928100f0" dependencies = [ "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] @@ -361,14 +293,14 @@ dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "windows-targets 0.52.5", + "windows-targets", ] [[package]] name = "clap" -version = "4.5.7" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" +checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" dependencies = [ "clap_builder", "clap_derive", @@ -376,9 +308,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.7" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" +checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" dependencies = [ "anstream", "anstyle", @@ -388,27 +320,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.5" +version = "4.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" +checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.77", ] [[package]] name = "clap_lex" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "colorchoice" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" [[package]] name = "console" @@ -423,12 +355,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "const-oid" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" - [[package]] name = "const-random" version = "0.1.18" @@ -449,45 +375,11 @@ dependencies = [ "tiny-keccak", ] -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" - -[[package]] -name = "cpufeatures" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" -dependencies = [ - "libc", -] - -[[package]] -name = "crc" -version = "3.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" -dependencies = [ - "crc-catalog", -] - -[[package]] -name = "crc-catalog" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "crc32fast" @@ -517,15 +409,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.20" @@ -538,16 +421,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" version = "1.3.0" @@ -583,29 +456,6 @@ dependencies = [ "rayon", ] -[[package]] -name = "der" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" -dependencies = [ - "const-oid", - "pem-rfc7468", - "zeroize", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", - "subtle", -] - [[package]] name = "displaydoc" version = "0.1.7" @@ -617,20 +467,11 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "dotenvy" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" - [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" -dependencies = [ - "serde", -] +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encode_unicode" @@ -640,12 +481,12 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "env_logger" -version = "0.7.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" dependencies = [ - "atty", "humantime", + "is-terminal", "log", "regex", "termcolor", @@ -657,38 +498,11 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "etcetera" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" -dependencies = [ - "cfg-if", - "home", - "windows-sys 0.48.0", -] - -[[package]] -name = "event-listener" -version = "2.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" - [[package]] name = "fallible-iterator" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fallible-streaming-iterator" @@ -696,12 +510,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fastrand" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" - [[package]] name = "flatbuffers" version = "23.5.26" @@ -714,137 +522,20 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.30" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", "miniz_oxide", ] -[[package]] -name = "flume" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" -dependencies = [ - "futures-core", - "futures-sink", - "spin 0.9.8", -] - [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-executor" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-intrusive" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" -dependencies = [ - "futures-core", - "lock_api", - "parking_lot", -] - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -856,12 +547,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "gimli" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" - [[package]] name = "half" version = "2.4.1" @@ -880,27 +565,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", - "allocator-api2", ] [[package]] name = "hashlink" -version = "0.8.4" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" dependencies = [ "hashbrown", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.5.0" @@ -909,66 +584,21 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hkdf" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" -dependencies = [ - "hmac", -] - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - -[[package]] -name = "home" -version = "0.5.9" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" [[package]] name = "humantime" -version = "1.3.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" -dependencies = [ - "quick-error", -] +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -987,21 +617,11 @@ dependencies = [ "cc", ] -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "indexmap" -version = "2.2.6" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown", @@ -1043,7 +663,6 @@ dependencies = [ "clap", "csv", "indicatif", - "libsqlite3-sys", "log", "num", "num-traits", @@ -1053,17 +672,26 @@ dependencies = [ "sage-core", "serde", "serde_json", - "sqlx", "timsrust", - "tokio", "toml", ] +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "is_terminal_polyfill" -version = "1.70.0" +version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itertools" @@ -1082,30 +710,27 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -dependencies = [ - "spin 0.5.2", -] +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "lexical-core" @@ -1173,9 +798,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.155" +version = "0.2.159" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" [[package]] name = "libm" @@ -1185,9 +810,9 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "libsqlite3-sys" -version = "0.26.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" +checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" dependencies = [ "cc", "pkg-config", @@ -1204,12 +829,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - [[package]] name = "lock_api" version = "0.4.12" @@ -1222,97 +841,51 @@ dependencies = [ [[package]] name = "log" -version = "0.4.21" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lz4" -version = "1.24.0" +version = "1.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9e2dd86df36ce760a60f6ff6ad526f7ba1f14ba0356f8254fb6905e6494df1" +checksum = "4d1febb2b4a79ddd1980eede06a8f7902197960aa0383ffcfdd62fe723036725" dependencies = [ - "libc", "lz4-sys", ] [[package]] name = "lz4-sys" -version = "1.9.4" +version = "1.11.1+lz4-1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" dependencies = [ "cc", "libc", ] -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - [[package]] name = "memchr" -version = "2.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.7.3" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" -dependencies = [ - "adler", -] +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] -name = "mio" -version = "0.8.11" +name = "memmap2" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", - "wasi", - "windows-sys 0.48.0", -] - -[[package]] -name = "native-tls" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", ] [[package]] -name = "nom" -version = "7.1.3" +name = "miniz_oxide" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" dependencies = [ - "memchr", - "minimal-lexical", + "adler2", ] [[package]] @@ -1331,29 +904,12 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-bigint-dig" -version = "0.8.4" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ - "byteorder", - "lazy_static", - "libm", "num-integer", - "num-iter", "num-traits", - "rand", - "smallvec", - "zeroize", ] [[package]] @@ -1406,81 +962,18 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - [[package]] name = "number_prefix" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" -[[package]] -name = "object" -version = "0.36.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" -dependencies = [ - "memchr", -] - [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "openssl" -version = "0.10.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" -dependencies = [ - "bitflags 2.5.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "openssl-probe" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" - -[[package]] -name = "openssl-sys" -version = "0.9.103" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "ordered-float" version = "2.10.1" @@ -1490,16 +983,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - [[package]] name = "parking_lot_core" version = "0.9.10" @@ -1508,9 +991,9 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.1", + "redox_syscall", "smallvec", - "windows-targets 0.52.5", + "windows-targets", ] [[package]] @@ -1541,7 +1024,7 @@ dependencies = [ "snap", "thrift", "twox-hash", - "zstd", + "zstd 0.12.4", ] [[package]] @@ -1550,77 +1033,32 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "pem-rfc7468" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" -dependencies = [ - "base64ct", -] - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "pin-project-lite" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkcs1" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" -dependencies = [ - "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs8" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "spki", -] - [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "d30538d42559de6b034bc76fd6dd4c38961b1ee5c6c56e3808c50128fdbc22ce" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "pretty_env_logger" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" dependencies = [ "env_logger", "log", @@ -1628,24 +1066,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.83" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] -[[package]] -name = "quick-error" -version = "1.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" - [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -1702,27 +1134,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.4.1" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +checksum = "355ae415ccd3a04315d3f8246e86d67689ea74d88d915576e1589a351062a13b" dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "redox_syscall" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" -dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", ] [[package]] name = "regex" -version = "1.10.4" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -1732,9 +1155,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -1743,37 +1166,17 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" - -[[package]] -name = "rsa" -version = "0.9.6" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" -dependencies = [ - "const-oid", - "digest", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core", - "signature", - "spki", - "subtle", - "zeroize", -] +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rusqlite" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -1781,34 +1184,15 @@ dependencies = [ "smallvec", ] -[[package]] -name = "rustc-demangle" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" - [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.38.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" -dependencies = [ - "bitflags 2.5.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", -] - [[package]] name = "ryu" version = "1.0.18" @@ -1829,44 +1213,12 @@ dependencies = [ "serde", ] -[[package]] -name = "schannel" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "security-framework" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" -dependencies = [ - "bitflags 2.5.0", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "semver" version = "1.0.23" @@ -1881,84 +1233,50 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.202" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.202" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.77", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] [[package]] name = "serde_spanned" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" dependencies = [ "serde", ] [[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "signature" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" -dependencies = [ - "digest", - "rand_core", -] - -[[package]] -name = "slab" -version = "0.4.9" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "smallvec" @@ -1972,275 +1290,18 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" -[[package]] -name = "socket2" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] - -[[package]] -name = "spki" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" -dependencies = [ - "base64ct", - "der", -] - -[[package]] -name = "sqlformat" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f895e3734318cc55f1fe66258926c9b910c124d47520339efecbb6c59cec7c1f" -dependencies = [ - "nom", - "unicode_categories", -] - -[[package]] -name = "sqlx" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e50c216e3624ec8e7ecd14c6a6a6370aad6ee5d8cfc3ab30b5162eeeef2ed33" -dependencies = [ - "sqlx-core", - "sqlx-macros", - "sqlx-mysql", - "sqlx-postgres", - "sqlx-sqlite", -] - -[[package]] -name = "sqlx-core" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d6753e460c998bbd4cd8c6f0ed9a64346fcca0723d6e75e52fdc351c5d2169d" -dependencies = [ - "ahash", - "atoi", - "byteorder", - "bytes", - "crc", - "crossbeam-queue", - "dotenvy", - "either", - "event-listener", - "futures-channel", - "futures-core", - "futures-intrusive", - "futures-io", - "futures-util", - "hashlink", - "hex", - "indexmap", - "log", - "memchr", - "native-tls", - "once_cell", - "paste", - "percent-encoding", - "serde", - "serde_json", - "sha2", - "smallvec", - "sqlformat", - "thiserror", - "tokio", - "tokio-stream", - "tracing", - "url", -] - -[[package]] -name = "sqlx-macros" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a793bb3ba331ec8359c1853bd39eed32cdd7baaf22c35ccf5c92a7e8d1189ec" -dependencies = [ - "proc-macro2", - "quote", - "sqlx-core", - "sqlx-macros-core", - "syn 1.0.109", -] - -[[package]] -name = "sqlx-macros-core" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4ee1e104e00dedb6aa5ffdd1343107b0a4702e862a84320ee7cc74782d96fc" -dependencies = [ - "dotenvy", - "either", - "heck 0.4.1", - "hex", - "once_cell", - "proc-macro2", - "quote", - "serde", - "serde_json", - "sha2", - "sqlx-core", - "sqlx-mysql", - "sqlx-sqlite", - "syn 1.0.109", - "tempfile", - "tokio", - "url", -] - -[[package]] -name = "sqlx-mysql" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864b869fdf56263f4c95c45483191ea0af340f9f3e3e7b4d57a61c7c87a970db" -dependencies = [ - "atoi", - "base64", - "bitflags 2.5.0", - "byteorder", - "bytes", - "crc", - "digest", - "dotenvy", - "either", - "futures-channel", - "futures-core", - "futures-io", - "futures-util", - "generic-array", - "hex", - "hkdf", - "hmac", - "itoa", - "log", - "md-5", - "memchr", - "once_cell", - "percent-encoding", - "rand", - "rsa", - "serde", - "sha1", - "sha2", - "smallvec", - "sqlx-core", - "stringprep", - "thiserror", - "tracing", - "whoami", -] - -[[package]] -name = "sqlx-postgres" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7ae0e6a97fb3ba33b23ac2671a5ce6e3cabe003f451abd5a56e7951d975624" -dependencies = [ - "atoi", - "base64", - "bitflags 2.5.0", - "byteorder", - "crc", - "dotenvy", - "etcetera", - "futures-channel", - "futures-core", - "futures-io", - "futures-util", - "hex", - "hkdf", - "hmac", - "home", - "itoa", - "log", - "md-5", - "memchr", - "once_cell", - "rand", - "serde", - "serde_json", - "sha1", - "sha2", - "smallvec", - "sqlx-core", - "stringprep", - "thiserror", - "tracing", - "whoami", -] - -[[package]] -name = "sqlx-sqlite" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59dc83cf45d89c555a577694534fcd1b55c545a816c816ce51f20bbe56a4f3f" -dependencies = [ - "atoi", - "flume", - "futures-channel", - "futures-core", - "futures-executor", - "futures-intrusive", - "futures-util", - "libsqlite3-sys", - "log", - "percent-encoding", - "serde", - "sqlx-core", - "tracing", - "url", -] - [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "stringprep" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" -dependencies = [ - "unicode-bidi", - "unicode-normalization", - "unicode-properties", -] - [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "subtle" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d0208408ba0c3df17ed26eb06992cb1a1268d41b2c0e12e65203fbe3972cee5" - [[package]] name = "syn" version = "1.0.109" @@ -2254,27 +1315,15 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "tempfile" -version = "3.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" -dependencies = [ - "cfg-if", - "fastrand", - "rustix", - "windows-sys 0.52.0", -] - [[package]] name = "termcolor" version = "1.4.1" @@ -2286,22 +1335,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.77", ] [[package]] @@ -2317,18 +1366,20 @@ dependencies = [ [[package]] name = "timsrust" -version = "0.2.2" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0791ad8b3836b6a582b1bfb563c04d0e09acdaf85e45c16d3158a0bcb164b6" +checksum = "4e6cc90c026b287bcea09d4a68231ee81dc3ec773447894134ad226fff006a20" dependencies = [ "bytemuck", - "byteorder", "linreg", + "memmap2", "parquet", "rayon", "rusqlite", + "serde", + "serde_json", "thiserror", - "zstd", + "zstd 0.13.2", ] [[package]] @@ -2340,53 +1391,11 @@ dependencies = [ "crunchy", ] -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "tokio" -version = "1.38.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "pin-project-lite", - "socket2", - "windows-sys 0.48.0", -] - -[[package]] -name = "tokio-stream" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - [[package]] name = "toml" -version = "0.8.13" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ "serde", "serde_spanned", @@ -2396,18 +1405,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.13" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ "indexmap", "serde", @@ -2416,38 +1425,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "tracing" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" -dependencies = [ - "log", - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "tracing-core" -version = "0.1.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" -dependencies = [ - "once_cell", -] - [[package]] name = "twox-hash" version = "1.6.3" @@ -2458,67 +1435,17 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-properties" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" - -[[package]] -name = "unicode-segmentation" -version = "1.11.0" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-width" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" - -[[package]] -name = "unicode_categories" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" - -[[package]] -name = "url" -version = "2.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "utf8parse" @@ -2534,9 +1461,9 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" @@ -2544,42 +1471,37 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "wasite" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" - [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.77", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2587,80 +1509,39 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" - -[[package]] -name = "whoami" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" -dependencies = [ - "redox_syscall 0.4.1", - "wasite", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "winapi-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.5", -] - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", + "windows-targets", ] [[package]] @@ -2669,172 +1550,128 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.5", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] name = "windows-targets" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.5", - "windows_aarch64_msvc 0.52.5", - "windows_i686_gnu 0.52.5", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.5", - "windows_x86_64_gnu 0.52.5", - "windows_x86_64_gnullvm 0.52.5", - "windows_x86_64_msvc 0.52.5", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" -version = "0.52.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.8" +version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" dependencies = [ "memchr", ] [[package]] name = "zerocopy" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.34" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.77", ] [[package]] -name = "zeroize" -version = "1.8.1" +name = "zstd" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe 6.0.6", +] [[package]] name = "zstd" -version = "0.12.4" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" dependencies = [ - "zstd-safe", + "zstd-safe 7.2.1", ] [[package]] @@ -2847,11 +1684,20 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 5717454..d3a6dee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,32 +7,37 @@ license = "Apache-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -timsrust = "= 0.2.2" +timsrust = "= 0.4.1" # Serialization -serde = { version = "1.0.193", features = ["derive"] } -serde_json = "1.0.108" -rayon = "1.8.0" -indicatif = {version = "*", features = ["rayon"]} -log = "0.4" -pretty_env_logger = "0.4" -num = "0.4.1" -num-traits = "0.2.18" -clap = {version = "4.4.17", features = ["derive"]} +serde = { version = "1.0.210", features = ["derive"] } +serde_json = "1.0.128" +clap = { version = "4.4.17", features = ["derive"] } csv = "1.3.0" + +# Random ... rand = "0.8.5" -sqlx = { version = "0.7.2", features = ["runtime-tokio-native-tls", "sqlite"]} -libsqlite3-sys = "^0.26.0" -tokio = {version="1.38.0", features = ["rt", "rt-multi-thread"]} +# Paralellization +rayon = "1.10.0" + +# Logging/ Messaging +indicatif = { version = "*", features = ["rayon"] } +log = "0.4.22" +pretty_env_logger = "0.5" + +# Q: Not sure if I still use this +num = "0.4.1" +num-traits = "0.2.19" # Sage sage-core = { git = "https://github.com/lazear/sage.git", rev = "9e870429889b341c4773df32b65e553283301a93" } -toml = "0.8.8" +toml = "0.8.19" [features] par_dataprep = [] -less_parallel = [] # Mostly for profiling reasons ... looking at flamegraphs is hard with rayon... +less_parallel = [ +] # Mostly for profiling reasons ... looking at flamegraphs is hard with rayon... [profile.release] lto = "thin" diff --git a/deny.toml b/deny.toml index 51e81af..ee0e475 100644 --- a/deny.toml +++ b/deny.toml @@ -20,17 +20,17 @@ # dependencies not shared by any other crates, would be ignored, as the target # list here is effectively saying which targets you are building for. targets = [ - { triple = "aarch64-apple-darwin" }, - { triple = "x86_64-apple-darwin" }, - { triple = "i686-pc-windows-gnu" }, - { triple = "i686-pc-windows-msvc" }, - { triple = "x86_64-pc-windows-gnu" }, - { triple = "x86_64-pc-windows-msvc" }, - { triple = "i686-unknown-linux-gnu" }, - { triple = "x86_64-unknown-linux-gnu" }, - { triple = "x86_64-unknown-linux-musl" }, - # { triple = "wasm32-unknown-unknown" }, - # { triple = "x86_64-unknown-redox" }, + { triple = "aarch64-apple-darwin" }, + { triple = "x86_64-apple-darwin" }, + { triple = "i686-pc-windows-gnu" }, + { triple = "i686-pc-windows-msvc" }, + { triple = "x86_64-pc-windows-gnu" }, + { triple = "x86_64-pc-windows-msvc" }, + { triple = "i686-unknown-linux-gnu" }, + { triple = "x86_64-unknown-linux-gnu" }, + { triple = "x86_64-unknown-linux-musl" }, + # { triple = "wasm32-unknown-unknown" }, + # { triple = "x86_64-unknown-redox" }, ] # When creating the dependency graph used as the source of truth when checks are # executed, this field can be used to prune crates from the graph, removing them @@ -73,8 +73,8 @@ yanked = "warn" # A list of advisory IDs to ignore. Note that ignored advisories will still # output a note when they are encountered. ignore = [ - #"RUSTSEC-0000-0000", - "RUSTSEC-2021-0145", # atty... potential unaligner when using custom allocators. + #"RUSTSEC-0000-0000", + "RUSTSEC-2023-0086", # Lexical-core -> arrow issue, updated in main in Sept-2024, unreleased. ] # Threshold for security vulnerabilities, any vulnerability with a CVSS score # lower than the range specified will be ignored. Note that ignored advisories @@ -101,13 +101,13 @@ version = 2 # See https://spdx.org/licenses/ for list of possible licenses # [possible values: any SPDX 3.11 short identifier (+ optional exception)]. allow = [ - "MIT", - "Apache-2.0", - "Apache-2.0 WITH LLVM-exception", - "BSD-3-Clause", - "Unicode-DFS-2016", - "Zlib", - # Thank Jesus for rerun that compiled the following for me... + "MIT", + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + "BSD-3-Clause", + "Unicode-DFS-2016", + "Zlib", + # Thank Jesus for rerun that compiled the following for me... ] # The confidence threshold for detecting a license from license text. @@ -146,8 +146,8 @@ license-files = [{ path = "LICENSE", hash = 0xbd0eed23 }] # and the crate will be checked normally, which may produce warnings or errors # depending on the rest of your configuration #license-files = [ - # Each entry is a crate relative path, and the (opaque) hash of its contents - #{ path = "LICENSE", hash = 0xbd0eed23 } +# Each entry is a crate relative path, and the (opaque) hash of its contents +#{ path = "LICENSE", hash = 0xbd0eed23 } #] [licenses.private] @@ -160,7 +160,7 @@ ignore = false # is only published to private registries, and ignore is true, the crate will # not have its license(s) checked registries = [ - #"https://sekretz.com/registry + #"https://sekretz.com/registry ] # This section is considered when running `cargo deny check bans`. @@ -187,17 +187,17 @@ workspace-default-features = "allow" external-default-features = "allow" # List of crates that are allowed. Use with care! allow = [ - #{ name = "ansi_term", version = "=0.11.0" }, + #{ name = "ansi_term", version = "=0.11.0" }, ] # List of crates to deny deny = [ - # Each entry the name of a crate and a version range. If version is - # not specified, all versions will be matched. - #{ name = "ansi_term", version = "=0.11.0" }, - # - # Wrapper crates can optionally be specified to allow the crate when it - # is a direct dependency of the otherwise banned crate - #{ name = "ansi_term", version = "=0.11.0", wrappers = [] }, + # Each entry the name of a crate and a version range. If version is + # not specified, all versions will be matched. + #{ name = "ansi_term", version = "=0.11.0" }, + # + # Wrapper crates can optionally be specified to allow the crate when it + # is a direct dependency of the otherwise banned crate + #{ name = "ansi_term", version = "=0.11.0", wrappers = [] }, ] # List of features to allow/deny diff --git a/src/aggregation/dbscan/denseframe_dbscan.rs b/src/aggregation/dbscan/denseframe_dbscan.rs index 87feca7..a0be7a6 100644 --- a/src/aggregation/dbscan/denseframe_dbscan.rs +++ b/src/aggregation/dbscan/denseframe_dbscan.rs @@ -1,3 +1,5 @@ +use timsrust::MSLevel; + use crate::aggregation::aggregators::TimsPeakAggregator; use crate::aggregation::converters::{ BypassDenseFrameBackConverter, @@ -25,9 +27,12 @@ pub fn dbscan_denseframe( min_n: usize, min_intensity: u64, ) -> DenseFrame { - let out_frame_type: timsrust::FrameType = denseframe.frame_type; + let out_acq_type: timsrust::AcquisitionType = denseframe.acquisition_type; let out_rt: f64 = denseframe.rt; let out_index: usize = denseframe.index; + let out_ms_level: MSLevel = denseframe.ms_level; + let out_window_group_id = denseframe.window_group_id; + let out_correction_factor = denseframe.intensity_correction_factor; let prefiltered_peaks = { denseframe.sort_by_mz(); @@ -72,8 +77,11 @@ pub fn dbscan_denseframe( raw_peaks: peak_vec, index: out_index, rt: out_rt, - frame_type: out_frame_type, + acquisition_type: out_acq_type, sorted: None, + ms_level: out_ms_level, + window_group_id: out_window_group_id, + intensity_correction_factor: out_correction_factor, } } diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index e41125f..e82465d 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -1,5 +1,6 @@ use core::fmt::Debug; use core::panic; +use std::path::Path; use indicatif::ParallelProgressIterator; use log::{ @@ -13,7 +14,17 @@ use serde::{ Deserialize, Serialize, }; -use timsrust::Frame; +use timsrust::converters::{ + Scan2ImConverter, + Tof2MzConverter, +}; +use timsrust::{ + AcquisitionType, + Frame, + MSLevel, + QuadrupoleSettings, + TimsRustError, +}; use super::aggregators::aggregate_clusters; use super::dbscan::runner::dbscan_label_clusters; @@ -28,11 +39,8 @@ use crate::ms::frames::{ DenseFrameWindow, ExpandedFrameSlice, FrameSlice, - MsMsFrameSliceWindowInfo, TimsPeak, }; -use crate::ms::tdf; -use crate::ms::tdf::DIAFrameInfo; use crate::space::space_generics::{ AsNDPointsAtIndex, IntenseAtIndex, @@ -167,9 +175,8 @@ fn _denoise_denseframe( fn denoise_frame_slice_window( frameslice_window: &[ExpandedFrameSlice], - ims_converter: &timsrust::Scan2ImConverter, - mz_converter: &timsrust::Tof2MzConverter, - _dia_frame_info: &DIAFrameInfo, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, min_n: usize, min_intensity: u64, _mz_scaling: f64, @@ -224,23 +231,7 @@ fn denoise_frame_slice_window( ); let ref_frame = &frameslice_window[frameslice_window.len() / 2]; - if ref_frame.slice_window_info.is_none() { - panic!("No slice window info found"); - } - - let slice_info = ref_frame.slice_window_info.as_ref().unwrap(); - let quad_group_id = match slice_info { - MsMsFrameSliceWindowInfo::WindowGroup(x) => *x, - MsMsFrameSliceWindowInfo::SingleWindow(x) => x.global_quad_row_id, - }; - let min_mz = match slice_info { - MsMsFrameSliceWindowInfo::WindowGroup(_x) => 0.0, - MsMsFrameSliceWindowInfo::SingleWindow(x) => x.mz_start, - }; - let max_mz = match slice_info { - MsMsFrameSliceWindowInfo::WindowGroup(_x) => 0.0, - MsMsFrameSliceWindowInfo::SingleWindow(x) => x.mz_end, - }; + let quad_settings = ref_frame.quadrupole_settings.clone(); let mut raw_peaks: Vec = centroids .into_iter() @@ -266,15 +257,16 @@ fn denoise_frame_slice_window( raw_peaks, index: ref_frame.parent_frame_index, rt: ref_frame.rt, - frame_type: timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF), + acquisition_type: ref_frame.acquisition_type, + ms_level: ref_frame.ms_level, sorted: None, + window_group_id: ref_frame.window_group_id, + intensity_correction_factor: ref_frame.intensity_correction_factor, }, ims_max: max_ims, ims_min: min_ims, - mz_start: min_mz as f64, - mz_end: max_mz as f64, - group_id: quad_group_id, - quad_group_id, + group_id: ref_frame.window_group_id.into(), + quadrupole_setting: quad_settings, }; maybe_save_json_if_debugging( &out, @@ -287,9 +279,8 @@ fn denoise_frame_slice_window( fn denoise_frame_slice( frame_window: &FrameSlice, - ims_converter: &timsrust::Scan2ImConverter, - mz_converter: &timsrust::Tof2MzConverter, - dia_frame_info: &DIAFrameInfo, + ims_converter: &Scan2ImConverter, + mz_converter: &Tof2MzConverter, min_n: usize, min_intensity: u64, mz_scaling: f64, @@ -297,12 +288,8 @@ fn denoise_frame_slice( ims_scaling: f32, max_ims_extension: f32, ) -> DenseFrameWindow { - let denseframe_window = DenseFrameWindow::from_frame_window( - frame_window, - ims_converter, - mz_converter, - dia_frame_info, - ); + let denseframe_window = + DenseFrameWindow::from_frame_window(frame_window, ims_converter, mz_converter); let denoised_frame = _denoise_denseframe( denseframe_window.frame, min_n, @@ -317,10 +304,8 @@ fn denoise_frame_slice( frame: denoised_frame, ims_min: denseframe_window.ims_min, ims_max: denseframe_window.ims_max, - mz_start: denseframe_window.mz_start, - mz_end: denseframe_window.mz_end, group_id: denseframe_window.group_id, - quad_group_id: denseframe_window.quad_group_id, + quadrupole_setting: denseframe_window.quadrupole_setting, } } @@ -365,8 +350,8 @@ struct FrameDenoiser { ims_scaling: f32, max_mz_extension: f64, max_ims_extension: f32, - ims_converter: timsrust::Scan2ImConverter, - mz_converter: timsrust::Tof2MzConverter, + ims_converter: Scan2ImConverter, + mz_converter: Tof2MzConverter, } impl<'a> Denoiser<'a, Frame, DenseFrame, Converters, Option> for FrameDenoiser { @@ -394,9 +379,8 @@ struct DIAFrameDenoiser { max_mz_extension: f64, ims_scaling: f32, max_ims_extension: f32, - dia_frame_info: DIAFrameInfo, - ims_converter: timsrust::Scan2ImConverter, - mz_converter: timsrust::Tof2MzConverter, + ims_converter: Scan2ImConverter, + mz_converter: Tof2MzConverter, } // impl DIAFrameDenoiser { @@ -433,7 +417,23 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> { info!("Denoising (centroiding) {} frames", elems.len()); - let mut frame_window_slices = self.dia_frame_info.split_frame_windows(&elems); + let mut flat_windows: Vec> = elems + .par_iter() + .flat_map(|x: &Frame| FrameSlice::from_frame(x)) + .collect(); + + flat_windows.par_sort_unstable_by(|a, b| { + a.quadrupole_settings + .partial_cmp(&b.quadrupole_settings) + .unwrap() + }); + + let mut break_points = vec![0]; + for i in 1..flat_windows.len() { + if flat_windows[i].quadrupole_settings != flat_windows[i - 1].quadrupole_settings { + break_points.push(i); + } + } // If profiling and having the "IONMESH_PROFILE_NUM_WINDOWS" env variable set // then only process the first N slices of windows. @@ -441,7 +441,8 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> if let Ok(num_windows) = std::env::var("IONMESH_PROFILE_NUM_WINDOWS") { let num_windows: usize = num_windows.parse().unwrap(); log::warn!("Profiling: Only processing {} windows", num_windows); - frame_window_slices.truncate(num_windows); + flat_windows.truncate(break_points[num_windows]); + break_points.truncate(num_windows); } // This warning reders to denoise_frame_slice_window. @@ -450,9 +451,12 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> // by timsrust ... warn!("Using prototype function for denoising, scalings are hard-coded"); - let mut out = Vec::with_capacity(frame_window_slices.len()); - let num_windows = frame_window_slices.len(); - for (i, sv) in frame_window_slices.iter().enumerate() { + let num_windows = break_points.len() - 1; + let mut out = Vec::with_capacity(num_windows); + let frame_window_slices: Vec<(usize, &[FrameSlice])> = (0..num_windows) + .map(|i| (i, &flat_windows[break_points[i]..break_points[i + 1]])) + .collect(); + for (i, sv) in frame_window_slices.iter() { info!("Denoising window {}/{}", i + 1, num_windows); let start_tot_peaks = sv.iter().map(|x| x.num_ndpoints() as u64).sum::(); let progbar = indicatif::ProgressBar::new(sv.len() as u64); @@ -462,7 +466,6 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> x, &self.ims_converter, &self.mz_converter, - &self.dia_frame_info, self.min_n, self.min_intensity, self.mz_scaling, @@ -481,7 +484,7 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> .map(lambda_denoise) .collect::>() } else { - sv.into_par_iter() + sv.par_iter() .map(|x| ExpandedFrameSlice::from_frame_slice(x)) .collect::>() .par_windows(3) @@ -518,18 +521,22 @@ pub fn read_all_ms1_denoising( max_mz_extension: f64, ims_scaling: f32, max_ims_extension: f32, -) -> Vec { - let reader = timsrust::FileReader::new(path).unwrap(); +) -> Result, TimsRustError> { + let metadata_path = Path::new(&path.clone()).join("analysis.tdf"); + let reader = timsrust::readers::FrameReader::new(path).unwrap(); + let metadata = timsrust::readers::MetadataReader::new(metadata_path).unwrap(); let mut timer = utils::ContextTimer::new("Reading all MS1 frames", true, utils::LogLevel::INFO); - let mut frames = reader.read_all_ms1_frames(); + let frames: Result, _> = reader.get_all_ms1().into_iter().collect(); timer.stop(true); - let ims_converter = reader.get_scan_converter().unwrap(); - let mz_converter = reader.get_tof_converter().unwrap(); + let mut frames = frames?; + + let ims_converter = metadata.im_converter; + let mz_converter = metadata.mz_converter; - frames.retain(|frame| matches!(frame.frame_type, timsrust::FrameType::MS1)); + frames.retain(|frame| matches!(frame.ms_level, MSLevel::MS1)); // let min_intensity = 100u64; // let min_n: usize = 3; @@ -548,29 +555,32 @@ pub fn read_all_ms1_denoising( utils::ContextTimer::new("Denoising all MS1 frames", true, utils::LogLevel::INFO); let out = ms1_denoiser.par_denoise_slice(frames); timer.stop(true); - out + Ok(out) } // This could probably be a macro ... pub fn read_all_dia_denoising( path: String, config: DenoiseConfig, -) -> (Vec>, DIAFrameInfo) { +) -> Result<(Vec>, Vec), TimsRustError> { let mut timer = utils::ContextTimer::new("Reading all DIA frames", true, utils::LogLevel::INFO); - let reader = timsrust::FileReader::new(path.clone()).unwrap(); + let metadata_path = Path::new(&path.clone()).join("analysis.tdf"); + let reader = timsrust::readers::FrameReader::new(path)?; + let metadata = timsrust::readers::MetadataReader::new(metadata_path.clone())?; + let quad_settings = timsrust::readers::QuadrupoleSettingsReader::new(metadata_path)?; - let dia_info = tdf::read_dia_frame_info(path.clone()).unwrap(); - let mut frames = reader.read_all_ms2_frames(); + let frames: Result, _> = reader.get_all_ms2().into_iter().collect(); - let ims_converter = reader.get_scan_converter().unwrap(); - let mz_converter = reader.get_tof_converter().unwrap(); + let ims_converter = metadata.im_converter; + let mz_converter = metadata.mz_converter; timer.stop(true); - frames.retain(|frame| { - matches!( - frame.frame_type, - timsrust::FrameType::MS2(timsrust::AcquisitionType::DIAPASEF) - ) + let mut frames = frames?; + + frames.retain(|frame| match (frame.ms_level, frame.acquisition_type) { + (MSLevel::MS2, AcquisitionType::DIAPASEF) => true, + (MSLevel::MS2, AcquisitionType::DiagonalDIAPASEF) => true, + _ => false, }); let denoiser = DIAFrameDenoiser { @@ -580,7 +590,6 @@ pub fn read_all_dia_denoising( max_mz_extension: config.max_mz_expansion_ratio.into(), ims_scaling: config.ims_scaling, max_ims_extension: config.max_ims_expansion_ratio, - dia_frame_info: dia_info.clone(), ims_converter, mz_converter, }; @@ -589,5 +598,5 @@ pub fn read_all_dia_denoising( let split_frames = denoiser.par_denoise_slice(frames); timer.stop(true); - (split_frames, dia_info) + Ok((split_frames, quad_settings)) } diff --git a/src/aggregation/tracing.rs b/src/aggregation/tracing.rs index d0fd27f..5aa22d0 100644 --- a/src/aggregation/tracing.rs +++ b/src/aggregation/tracing.rs @@ -227,10 +227,19 @@ impl TraceLike for BaseTrace { } } +pub fn calculate_cycle_time(frames: &[DenseFrameWindow]) -> f64 { + let rts = frames.iter().map(|x| x.frame.rt).collect::>(); + let rt_diffs = rts.windows(2).map(|x| x[1] - x[0]).collect::>(); + let cycle_time = rt_diffs.iter().sum::() / rt_diffs.len() as f64; + + assert!(cycle_time > 0.); + + cycle_time +} + pub fn combine_traces( grouped_denseframe_windows: Vec>, config: TracingConfig, - rt_binsize: f32, ) -> Vec> { // mz_scaling: f64, // rt_scaling: f64, @@ -241,12 +250,19 @@ pub fn combine_traces( let mut timer = utils::ContextTimer::new("Tracing peaks in time", true, utils::LogLevel::INFO); - let grouped_windows: Vec> = grouped_denseframe_windows + // rt_binsize: f32, + + let grouped_windows: Vec<(f64, Vec)> = grouped_denseframe_windows .into_iter() - .map(_flatten_denseframe_vec) + .map(|mut x| { + x.par_sort_unstable_by(|a, b| a.frame.rt.partial_cmp(&b.frame.rt).unwrap()); + let cycle_time = calculate_cycle_time(&x); + let o = _flatten_denseframe_vec(x); + (cycle_time, o) + }) .collect(); - let combine_lambda = |x: Vec| { + let combine_lambda = |cycle_time: f64, x: Vec| { combine_single_window_traces2( x, config.mz_scaling.into(), @@ -257,18 +273,21 @@ pub fn combine_traces( config.max_ims_expansion_ratio, config.min_n.into(), config.min_neighbor_intensity, - rt_binsize, + cycle_time as f32, ) }; // Combine the traces let out: Vec> = if cfg!(feature = "less_parallel") { warn!("Running in single-threaded mode"); - grouped_windows.into_iter().map(combine_lambda).collect() + grouped_windows + .into_iter() + .map(|x| combine_lambda(x.0, x.1)) + .collect() } else { grouped_windows .into_par_iter() - .map(combine_lambda) + .map(|x| combine_lambda(x.0, x.1)) .collect() }; @@ -381,12 +400,16 @@ fn _flatten_denseframe_vec(denseframe_windows: Vec) -> Vec Result<(), std::io::Error> { let args = Args::parse(); if args.write_template { @@ -90,7 +90,7 @@ fn main() { } else { std::fs::write(out_path.clone(), config_str).unwrap(); println!("Wrote default config to {}", out_path); - return; + return Ok(()); } } @@ -130,14 +130,22 @@ fn main() { .map(|path| out_path_dir.join(path).to_path_buf()); log::info!("Reading DIA data from: {}", path_use); - let (dia_frames, dia_info) = + let tmp = aggregation::ms_denoise::read_all_dia_denoising(path_use.clone(), config.denoise_config); - let cycle_time = dia_info.calculate_cycle_time(); + let (dia_frames, dia_info) = match tmp { + Ok(x) => x, + Err(e) => { + log::error!("Error reading DIA data: {:?}", e); + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + e.to_string(), + )); + }, + }; // TODO add here expansion limits - let mut traces = - aggregation::tracing::combine_traces(dia_frames, config.tracing_config, cycle_time); + let mut traces = aggregation::tracing::combine_traces(dia_frames, config.tracing_config); // let out = match out_traces_path { // Some(out_path) => aggregation::tracing::write_trace_csv(&traces, out_path), @@ -225,11 +233,7 @@ fn main() { config.sage_search_config, out_path_features.clone(), 1, - ); - match score_out { - Ok(_) => {}, - Err(e) => { - log::error!("Error scoring pseudospectra: {:?}", e); - }, - } + )?; + + Ok(()) } diff --git a/src/ms/frames/dense_frame_window.rs b/src/ms/frames/dense_frame_window.rs index 9e38787..f49938e 100644 --- a/src/ms/frames/dense_frame_window.rs +++ b/src/ms/frames/dense_frame_window.rs @@ -1,25 +1,24 @@ use log::info; use serde::Serialize; -use timsrust::{ - ConvertableIndex, - Frame, +use timsrust::converters::{ + ConvertableDomain, Scan2ImConverter, Tof2MzConverter, }; +use timsrust::{ + AcquisitionType, + Frame, +}; use super::frames::SortingOrder; use super::{ DenseFrame, FrameSlice, + SingleQuadrupoleSettings, TimsPeak, }; -use crate::ms::frames::MsMsFrameSliceWindowInfo; -use crate::ms::tdf::{ - DIAFrameInfo, - ScanRange, -}; -pub type Converters = (timsrust::Scan2ImConverter, timsrust::Tof2MzConverter); +pub type Converters = (Scan2ImConverter, Tof2MzConverter); fn check_peak_sanity(peak: &TimsPeak) { debug_assert!(peak.intensity > 0); debug_assert!(peak.mz > 0.); @@ -32,10 +31,8 @@ pub struct DenseFrameWindow { pub frame: DenseFrame, pub ims_min: f32, pub ims_max: f32, - pub mz_start: f64, - pub mz_end: f64, pub group_id: usize, - pub quad_group_id: usize, + pub quadrupole_setting: SingleQuadrupoleSettings, } impl DenseFrameWindow { @@ -43,57 +40,41 @@ impl DenseFrameWindow { frame_window: &FrameSlice, ims_converter: &Scan2ImConverter, mz_converter: &Tof2MzConverter, - dia_info: &DIAFrameInfo, ) -> DenseFrameWindow { - let (window_group_id, ww_quad_group_id, scan_start) = match frame_window.slice_window_info { - None => { - panic!("No window info") - // This branch points to an error in logic ... - // The window info should always be present in this context. - }, - Some(MsMsFrameSliceWindowInfo::WindowGroup(_)) => { + let window_group_id = frame_window.window_group_id; + let foo = match frame_window.acquisition_type { + AcquisitionType::DiagonalDIAPASEF => { // This branch should be easy to implement for things like synchro pasef... // Some details to iron out though ... panic!("Not implemented") }, - Some(MsMsFrameSliceWindowInfo::SingleWindow(ref x)) => { - let window_group_id = x.window_group_id; - let ww_quad_group_id = x.within_window_quad_group_id; - let scan_start = frame_window.scan_start; - (window_group_id, ww_quad_group_id, scan_start) + AcquisitionType::DIAPASEF => { + frame_window.quadrupole_settings.clone() + + // let window_group_id = frame_window.window_group_id; + // let ww_quad_group_id = x.within_window_quad_group_id; + // let scan_start = frame_window.scan_start; + // (window_group_id, ww_quad_group_id, scan_start) }, + _ => panic!("Not implemented"), }; // NOTE: I am swapping here the 'scan start' to be the `ims_end` because // the first scans have lower 1/k0 values. - let ims_max = ims_converter.convert(scan_start as u32) as f32; + let ims_max = ims_converter.convert(foo.scan_start as u32) as f32; let ims_min = - ims_converter.convert((frame_window.scan_offsets.len() + scan_start) as u32) as f32; + ims_converter.convert((frame_window.scan_offsets.len() + foo.scan_start) as u32) as f32; debug_assert!(ims_max <= ims_min); - let scan_range: Option<&ScanRange> = - dia_info.get_quad_windows(window_group_id, ww_quad_group_id); - let scan_range = match scan_range { - Some(x) => x, - None => { - panic!( - "No scan range for window_group_id: {}, within_window_quad_group_id: {}", - window_group_id, ww_quad_group_id - ); - }, - }; - let frame = DenseFrame::from_frame_window(frame_window, ims_converter, mz_converter); DenseFrameWindow { frame, ims_min, ims_max, - mz_start: scan_range.iso_low as f64, - mz_end: scan_range.iso_high as f64, - group_id: window_group_id, - quad_group_id: ww_quad_group_id, + group_id: window_group_id.into(), + quadrupole_setting: foo, } } } @@ -134,14 +115,18 @@ impl DenseFrame { let index = frame.index; let rt = frame.rt; - let frame_type = frame.frame_type; + let acquisition_type = frame.acquisition_type; + let ms_level = frame.ms_level; DenseFrame { raw_peaks: peaks, index, rt, - frame_type, + acquisition_type, + ms_level, sorted: None, + intensity_correction_factor: frame.intensity_correction_factor, + window_group_id: frame.window_group, } } @@ -189,14 +174,18 @@ impl DenseFrame { let index = frame_window.parent_frame_index; let rt = frame_window.rt; - let frame_type = frame_window.frame_type; + let acquisition_type = frame_window.acquisition_type; + let ms_level = frame_window.ms_level; DenseFrame { raw_peaks: peaks, index, rt, - frame_type, + acquisition_type, + ms_level, sorted: None, + intensity_correction_factor: frame_window.intensity_correction_factor, + window_group_id: frame_window.window_group_id, } } diff --git a/src/ms/frames/frame_slice.rs b/src/ms/frames/frame_slice.rs index 48ba993..0e2b828 100644 --- a/src/ms/frames/frame_slice.rs +++ b/src/ms/frames/frame_slice.rs @@ -2,11 +2,12 @@ use std::fmt; use serde::Serialize; use timsrust::{ + AcquisitionType, Frame, - FrameType, + MSLevel, }; -use super::FrameMsMsWindowInfo; +use super::SingleQuadrupoleSettings; use crate::space::space_generics::{ AsNDPointsAtIndex, IntenseAtIndex, @@ -94,26 +95,32 @@ impl fmt::Display for ScanOutOfBoundsError { /// /// Renamed from the frame: /// - parent_frame_index 34 // renamed from Frame.index for clarity. +/// - window_group_id 4 // renamed from Frame.window_group for clarity. /// /// Additions for FrameSlice: /// - scan_start 123 // The scan number of the first scan offset in the current window. -/// - slice_window_info Some(MsMsFrameSliceWindowInfo::SingleWindow(FrameMsMsWindow)) #[derive(Debug, Clone, Serialize)] pub struct FrameSlice<'a> { - // pub scan_offsets: &'a [usize], // Timsrust changed this later ... - pub scan_offsets: &'a [u64], + pub scan_offsets: &'a [usize], pub tof_indices: &'a [u32], pub intensities: &'a [u32], pub parent_frame_index: usize, pub rt: f64, + pub window_group_id: u8, + pub intensity_correction_factor: f64, #[serde(skip)] - pub frame_type: FrameType, + pub ms_level: MSLevel, + + #[serde(skip)] + pub acquisition_type: AcquisitionType, + + #[serde(skip)] + pub quadrupole_settings: SingleQuadrupoleSettings, // From this point on they are local implementations // Before they are used from the timsrust crate. pub scan_start: usize, - pub slice_window_info: Option, } #[derive(Debug, Clone, Serialize)] @@ -123,19 +130,43 @@ pub struct ExpandedFrameSlice { pub intensities: Vec, pub parent_frame_index: usize, pub rt: f64, - pub slice_window_info: Option, + pub window_group_id: u8, + pub intensity_correction_factor: f64, + + #[serde(skip)] + pub quadrupole_settings: SingleQuadrupoleSettings, + + #[serde(skip)] + pub acquisition_type: AcquisitionType, #[serde(skip)] - pub frame_type: FrameType, + pub ms_level: MSLevel, } impl<'a> FrameSlice<'a> { - pub fn slice_frame( + pub fn from_frame(frame: &'a Frame) -> Vec> { + let quad_settings = frame.quadrupole_settings.clone(); + if quad_settings.len() == 0 { + panic!("Runtime error, at this stage at least one quadrupole setting is required"); + } + + let out = (0..quad_settings.len()) + .map(|i| { + let quad_info = + SingleQuadrupoleSettings::from_quad_settings(quad_settings.clone(), i); + FrameSlice::slice_frame(frame, quad_info) + }) + .collect::>(); + + out + } + + fn slice_frame( frame: &'a Frame, - scan_start: usize, - scan_end: usize, - slice_window_info: Option, + quad_info: SingleQuadrupoleSettings, ) -> FrameSlice<'a> { + let scan_start = quad_info.scan_start; + let scan_end = quad_info.scan_end; let scan_offsets = &frame.scan_offsets[scan_start..=scan_end]; let indprt_start = scan_offsets[0] as usize; @@ -151,7 +182,7 @@ impl<'a> FrameSlice<'a> { for i in 1..(scan_offsets.len() - 1) { debug_assert!(scan_offsets[i] <= scan_offsets[i + 1]); debug_assert!( - (scan_offsets[i + 1] - init_offset) <= tof_indices.len() as u64, + (scan_offsets[i + 1] - init_offset) <= tof_indices.len(), "scan_offsets[i+1]: {}, init_offset: {}, tof_indices.len(): {}", scan_offsets[i + 1], init_offset, @@ -166,9 +197,12 @@ impl<'a> FrameSlice<'a> { intensities, parent_frame_index: frame.index, rt: frame.rt, - frame_type: frame.frame_type, + ms_level: frame.ms_level, + acquisition_type: frame.acquisition_type, scan_start, - slice_window_info, + quadrupole_settings: quad_info, + window_group_id: frame.window_group, + intensity_correction_factor: frame.intensity_correction_factor, } } @@ -183,7 +217,7 @@ impl<'a> FrameSlice<'a> { local_index: usize, ) -> usize { debug_assert!(local_index < self.tof_indices.len()); - let search_val = self.scan_offsets[0] + local_index as u64; + let search_val = self.scan_offsets[0] + local_index; let loc = self .scan_offsets .binary_search_by(|x| x.partial_cmp(&search_val).unwrap()); @@ -399,8 +433,9 @@ impl ExpandedFrameSlice { pub fn from_frame_slice(frame_slice: &FrameSlice) -> ExpandedFrameSlice { let parent_frame_index = frame_slice.parent_frame_index; let rt = frame_slice.rt; - let slice_window_info = frame_slice.slice_window_info.clone(); - let frame_type = frame_slice.frame_type; + let quadrupole_settings = frame_slice.quadrupole_settings.clone(); + let acquisition_type = frame_slice.acquisition_type; + let ms_level = frame_slice.ms_level; let scan_numbers = frame_slice.explode_scan_numbers(); // Sort all arrays on the tof indices. @@ -430,8 +465,11 @@ impl ExpandedFrameSlice { intensities, parent_frame_index, rt, - slice_window_info, - frame_type, + quadrupole_settings, + acquisition_type, + ms_level, + window_group_id: frame_slice.window_group_id.clone(), + intensity_correction_factor: frame_slice.intensity_correction_factor.clone(), } } } @@ -439,37 +477,77 @@ impl ExpandedFrameSlice { // Tests for the FrameSlice #[cfg(test)] mod tests { + use std::sync::Arc; + + use timsrust::{ + MSLevel, + QuadrupoleSettings, + }; + use super::*; fn sample_frame() -> Frame { + let ms1_quad_settings = QuadrupoleSettings { + index: 0, + scan_starts: vec![], + scan_ends: vec![], + isolation_mz: vec![], + isolation_width: vec![], + collision_energy: vec![], + }; + let arc_ms1_quad_settings = Arc::new(ms1_quad_settings); Frame { index: 0, scan_offsets: vec![0, 0, 0, 0, 0, 3, 5, 6], tof_indices: vec![100, 101, 102, 10, 20, 30], intensities: vec![123, 111, 12, 3, 4, 1], rt: 65.34, - frame_type: FrameType::MS1, + ms_level: MSLevel::MS1, + acquisition_type: AcquisitionType::DIAPASEF, + quadrupole_settings: arc_ms1_quad_settings, + window_group: 0, + intensity_correction_factor: 1., } } #[test] fn test_frame_slice() { let frame = sample_frame(); - let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + let quad_setting = SingleQuadrupoleSettings { + parent_index: 0, + scan_start: 3, + scan_end: 5, + isolation_mz: 100., + isolation_max: 100., + isolation_min: 100., + isolation_width: 25., + collision_energy: 10., + }; + let frame_slice = FrameSlice::slice_frame(&frame, quad_setting); assert_eq!(frame_slice.scan_offsets, &[0, 0, 3]); assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); assert_eq!(frame_slice.intensities, &[123, 111, 12]); assert_eq!(frame_slice.parent_frame_index, 0); assert_eq!(frame_slice.rt, 65.34); - assert_eq!(frame_slice.frame_type, FrameType::MS1); + assert_eq!(frame_slice.ms_level, MSLevel::MS1); assert_eq!(frame_slice.scan_start, 3); } #[test] fn test_global_scan_at_index() { let frame = sample_frame(); - let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + let quad_setting = SingleQuadrupoleSettings { + parent_index: 0, + scan_start: 3, + scan_end: 5, + isolation_mz: 100., + isolation_max: 100., + isolation_min: 100., + isolation_width: 25., + collision_energy: 10., + }; + let frame_slice = FrameSlice::slice_frame(&frame, quad_setting); assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); assert_eq!(frame_slice.global_scan_at_index(0), 4); @@ -482,7 +560,17 @@ mod tests { fn test_global_scan_at_index_oob_fails() { // these should fail ... test that it fails. let frame = sample_frame(); - let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + let quad_setting = SingleQuadrupoleSettings { + parent_index: 0, + scan_start: 3, + scan_end: 5, + isolation_mz: 100., + isolation_max: 100., + isolation_min: 100., + isolation_width: 25., + collision_energy: 10., + }; + let frame_slice = FrameSlice::slice_frame(&frame, quad_setting); assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); frame_slice.global_scan_at_index(3); } @@ -490,7 +578,17 @@ mod tests { #[test] fn test_explode_scan_numbers() { let frame = sample_frame(); - let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + let quad_setting = SingleQuadrupoleSettings { + parent_index: 0, + scan_start: 3, + scan_end: 5, + isolation_mz: 100., + isolation_max: 100., + isolation_min: 100., + isolation_width: 25., + collision_energy: 10., + }; + let frame_slice = FrameSlice::slice_frame(&frame, quad_setting); assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); assert_eq!(frame_slice.scan_offsets, &[0, 0, 3]); assert_eq!(frame_slice.explode_scan_numbers(), vec![4, 4, 4]); @@ -499,7 +597,17 @@ mod tests { #[test] fn test_tof_intensities_at_scan() { let frame = sample_frame(); - let frame_slice = FrameSlice::slice_frame(&frame, 3, 5, None); + let quad_setting = SingleQuadrupoleSettings { + parent_index: 0, + scan_start: 3, + scan_end: 5, + isolation_mz: 100., + isolation_max: 100., + isolation_min: 100., + isolation_width: 25., + collision_energy: 10., + }; + let frame_slice = FrameSlice::slice_frame(&frame, quad_setting); assert_eq!(frame_slice.tof_indices, &[100, 101, 102]); assert_eq!(frame_slice.scan_offsets, &[0, 0, 3]); @@ -547,7 +655,17 @@ mod tests { #[test] fn test_tof_range_in_tolerance_at_scan() { let frame = sample_frame(); - let frame_slice = FrameSlice::slice_frame(&frame, 3, 7, None); + let quad_setting = SingleQuadrupoleSettings { + parent_index: 0, + scan_start: 3, + scan_end: 7, + isolation_mz: 100., + isolation_max: 100., + isolation_min: 100., + isolation_width: 25., + collision_energy: 10., + }; + let frame_slice = FrameSlice::slice_frame(&frame, quad_setting); assert_eq!(frame_slice.tof_indices, &[100, 101, 102, 10, 20, 30]); assert_eq!(frame_slice.scan_offsets, &[0, 0, 3, 5, 6]); @@ -617,13 +735,26 @@ mod tests { } fn sample_ms2_frame() -> Frame { + let ms2_quad_settings = QuadrupoleSettings { + index: 0, + scan_starts: vec![2, 6], + scan_ends: vec![5, 9], + isolation_mz: vec![100., 101.], + isolation_width: vec![25., 25.], + collision_energy: vec![10., 10.], + }; + let arc_ms2_quad_settings = Arc::new(ms2_quad_settings); Frame { index: 0, scan_offsets: vec![0, 0, 3, 5, 6], tof_indices: vec![100, 101, 102, 10, 20, 30], intensities: vec![123, 111, 12, 3, 4, 1], rt: 65.34, - frame_type: FrameType::MS2(timsrust::AcquisitionType::DIAPASEF), + ms_level: MSLevel::MS2, + acquisition_type: AcquisitionType::DIAPASEF, + quadrupole_settings: arc_ms2_quad_settings, + window_group: 1, + intensity_correction_factor: 1., } } } @@ -912,9 +1043,3 @@ impl RangeSet { false } } - -#[derive(Debug, Clone, Serialize)] -pub enum MsMsFrameSliceWindowInfo { - WindowGroup(usize), - SingleWindow(FrameMsMsWindowInfo), -} diff --git a/src/ms/frames/frame_slice_rt_window.rs b/src/ms/frames/frame_slice_rt_window.rs index 825a81e..d123558 100644 --- a/src/ms/frames/frame_slice_rt_window.rs +++ b/src/ms/frames/frame_slice_rt_window.rs @@ -1,6 +1,10 @@ use log::trace; use serde::Serialize; -use timsrust::ConvertableIndex; +use timsrust::converters::{ + ConvertableDomain, + Scan2ImConverter, + Tof2MzConverter, +}; use super::{ ExpandedFrameSlice, @@ -225,8 +229,8 @@ pub struct RawScaleTimsPeak { impl RawScaleTimsPeak { pub fn to_timspeak( &self, - mz_converter: &timsrust::Tof2MzConverter, - ims_converter: &timsrust::Scan2ImConverter, + mz_converter: &Tof2MzConverter, + ims_converter: &Scan2ImConverter, ) -> TimsPeak { TimsPeak { intensity: self.intensity as u32, diff --git a/src/ms/frames/frames.rs b/src/ms/frames/frames.rs index 13d7851..fa6351a 100644 --- a/src/ms/frames/frames.rs +++ b/src/ms/frames/frames.rs @@ -1,5 +1,11 @@ +use std::sync::Arc; + use serde::Serialize; -pub use timsrust::FrameType; +pub use timsrust::{ + AcquisitionType, + MSLevel, + QuadrupoleSettings, +}; use crate::space::space_generics::HasIntensity; @@ -56,23 +62,63 @@ pub struct DenseFrame { pub raw_peaks: Vec, pub index: usize, pub rt: f64, + pub window_group_id: u8, + pub intensity_correction_factor: f64, + + #[serde(skip_serializing)] + pub acquisition_type: AcquisitionType, #[serde(skip_serializing)] - pub frame_type: FrameType, + pub ms_level: MSLevel, #[serde(skip_serializing)] pub sorted: Option, } -/// Information on the context of a window in a frame. -/// -/// This adds to a frame slice the context of the what isolation was used -/// to generate the frame slice. -#[derive(Debug, Clone, Serialize)] -pub struct FrameMsMsWindowInfo { - pub mz_start: f32, - pub mz_end: f32, - pub window_group_id: usize, - pub within_window_quad_group_id: usize, - pub global_quad_row_id: usize, +#[derive(Debug, Clone, Serialize, PartialEq)] +pub struct SingleQuadrupoleSettings { + pub parent_index: usize, + pub scan_start: usize, + pub scan_end: usize, + pub isolation_mz: f64, + pub isolation_max: f64, + pub isolation_min: f64, + pub isolation_width: f64, + pub collision_energy: f64, +} + +impl SingleQuadrupoleSettings { + pub fn from_quad_settings( + quad_settings: Arc, + index: usize, + ) -> Self { + let isolation_mz = quad_settings.isolation_mz[index]; + let isolation_width = quad_settings.isolation_width[index]; + let isolation_max = isolation_mz + (isolation_width / 2.); + let isolation_min = isolation_mz - (isolation_width / 2.); + let collision_energy = quad_settings.collision_energy[index]; + + Self { + parent_index: quad_settings.index, + scan_start: quad_settings.scan_starts[index], + scan_end: quad_settings.scan_ends[index], + isolation_mz, + isolation_max, + isolation_min, + isolation_width, + collision_energy, + } + } +} + +impl PartialOrd for SingleQuadrupoleSettings { + fn partial_cmp( + &self, + other: &Self, + ) -> Option { + match self.parent_index.partial_cmp(&other.parent_index) { + Some(std::cmp::Ordering::Equal) => self.scan_start.partial_cmp(&other.scan_start), + x => x, + } + } } diff --git a/src/ms/frames/mod.rs b/src/ms/frames/mod.rs index 64a8db0..0f36625 100644 --- a/src/ms/frames/mod.rs +++ b/src/ms/frames/mod.rs @@ -9,10 +9,9 @@ pub use dense_frame_window::{ pub use frame_slice::{ ExpandedFrameSlice, FrameSlice, - MsMsFrameSliceWindowInfo, }; pub use frames::{ DenseFrame, - FrameMsMsWindowInfo, + SingleQuadrupoleSettings, TimsPeak, }; diff --git a/src/ms/mod.rs b/src/ms/mod.rs index 1e9820a..5f89a78 100644 --- a/src/ms/mod.rs +++ b/src/ms/mod.rs @@ -1,3 +1,2 @@ pub mod frames; pub mod sorting; -pub mod tdf; diff --git a/src/ms/tdf.rs b/src/ms/tdf.rs deleted file mode 100644 index 6fbfea2..0000000 --- a/src/ms/tdf.rs +++ /dev/null @@ -1,567 +0,0 @@ -use std::path::Path; - -use log::{ - debug, - info, -}; -use sqlx::{ - FromRow, - Pool, - Sqlite, - SqlitePool, -}; -use timsrust::{ - ConvertableIndex, - Frame, -}; -use tokio; -use tokio::runtime::Runtime; - -use crate::ms::frames::{ - FrameMsMsWindowInfo, - FrameSlice, - MsMsFrameSliceWindowInfo, -}; - -// Diaframemsmsinfo = vec of frame_id -> windowgroup_id -// diaframemsmswindows = vec[(windowgroup_id, scanstart, scanend, iso_mz, iso_with, nce)] - -#[derive(Debug, Clone)] -pub struct ScanRange { - pub row_id: usize, - pub scan_start: usize, - pub scan_end: usize, - pub iso_mz: f32, - pub iso_width: f32, - pub nce: f32, - pub ims_start: f32, - pub ims_end: f32, - pub iso_low: f32, - pub iso_high: f32, - pub window_group_id: usize, - pub within_window_quad_group_id: usize, -} - -impl ScanRange { - pub fn new( - row_id: usize, - window_group_id: usize, - within_window_quad_group_id: usize, - scan_start: usize, - scan_end: usize, - iso_mz: f32, - iso_width: f32, - nce: f32, - scan_converter: &timsrust::Scan2ImConverter, - ) -> Self { - // Note that here I swap the start and end, - // because lower scan numbers are actually - // higher 1/k0 values. ... i think... - let ims_end = scan_converter.convert(scan_start as u32); - let ims_start = scan_converter.convert(scan_end as u32); - - debug_assert!(ims_start < ims_end); - let iso_low = iso_mz - iso_width / 2.0; - let iso_high = iso_mz + iso_width / 2.0; - - Self { - row_id, - scan_start, - scan_end, - iso_mz, - iso_width, - nce, - ims_start: ims_start as f32, - ims_end: ims_end as f32, - iso_low, - iso_high, - window_group_id, - within_window_quad_group_id, - } - } -} - -impl From for FrameMsMsWindowInfo { - fn from(val: ScanRange) -> Self { - FrameMsMsWindowInfo { - mz_start: val.iso_low, - mz_end: val.iso_high, - window_group_id: val.window_group_id, - within_window_quad_group_id: val.within_window_quad_group_id, - global_quad_row_id: val.row_id, - } - } -} - -#[derive(Debug, Clone)] -pub struct DIAWindowGroup { - pub window_group_id: usize, - pub scan_ranges: Vec, -} - -#[derive(Debug, Clone)] -pub enum GroupingLevel { - WindowGroup, // Technically this is the same as the frame level ... - QuadWindowGroup, -} - -#[derive(Debug, Clone)] -pub struct DIAFrameInfo { - pub groups: Vec>, - /// Frame Groups is a vec of length equal to the number of frames. - /// Each element is an Option that is the index of the group - /// that the frame belongs to. - pub frame_groups: Vec>, - pub retention_times: Vec>, - pub grouping_level: GroupingLevel, - pub number_of_groups: usize, - - /// The row to group is meant to map the `Isolation window row id` - /// to the grouping level it will have... for diaPASEF, since every - /// scan range has a different quand window, the number of distinct - /// groups is the number of scan ranges (window groups+scan range - /// combinations). For the case of diagonal PASEF, the number of - /// groups is the number of window groups, since the scan ranges - /// are not independent from each other. - pub row_to_group: Vec, -} - -// TODO rename or split this ... since it is becoming more -// of a splitter than a frame info reader. -// Maybe a builder -> splitter pattern? -impl DIAFrameInfo { - pub fn get_dia_frame_window_group( - &self, - frame_id: usize, - ) -> Option<&DIAWindowGroup> { - let group_id = self.frame_groups[frame_id]; - match group_id { - None => None, - Some(group_id) => self.groups[group_id].as_ref(), - } - } - - async fn rts_from_tdf_connection(conn: &Pool) -> Result>, sqlx::Error> { - // To calculate cycle time -> - // DiaFrameMsMsInfo -> Get the frames that match a specific id (one for each ...) - // Frames -> SELECT id, time FROM Frames -> make a Vec>, map the former - // framer id list (no value should be None). - // Scan diff the new vec! - let results: Vec<(i32, f32)> = sqlx::query_as("SELECT Id, Time FROM Frames") - .fetch_all(conn) - .await?; - let mut retention_times = Vec::new(); - for row in results.iter() { - let id: usize = row.0 as usize; - let time: f32 = row.1; - retention_times.resize(id + 1, None); - retention_times[id] = Some(time); - } - Ok(retention_times) - } - - pub fn calculate_cycle_time(&self) -> f32 { - let mut group_cycle_times = Vec::new(); - - for (i, group) in self.groups.iter().enumerate() { - if group.is_none() { - continue; - } - - let mapping_frames: Vec = self - .frame_groups - .iter() - .enumerate() - .filter(|(_, group_id)| { - if group_id.is_none() { - return false; - } - let group_id = group_id.unwrap(); - group_id == i - }) - .map(|(frame_id, _group_id)| frame_id) - .collect(); - - let local_times = mapping_frames - .iter() - .map(|frame_id| self.retention_times[*frame_id].unwrap()) - .scan(0.0, |acc, x| { - let out = x - *acc; - *acc = x; - Some(out) - }) - .collect::>(); - - let cycle_time = local_times.iter().sum::() / local_times.len() as f32; - group_cycle_times.push(cycle_time); - } - - debug!("Group cycle times: {:?}", group_cycle_times); - let avg_cycle_time = group_cycle_times.iter().sum::() / group_cycle_times.len() as f32; - avg_cycle_time - } - - pub fn split_frame<'a, 'b>( - &'b self, - frame: &'a Frame, - window_group: &DIAWindowGroup, - ) -> Result, &'static str> - where - 'a: 'b, - { - let mut out_frames = Vec::new(); - for scan_range in window_group.scan_ranges.iter() { - let slice_w_info: MsMsFrameSliceWindowInfo = - MsMsFrameSliceWindowInfo::SingleWindow(scan_range.clone().into()); - let frame_slice = FrameSlice::slice_frame( - frame, - scan_range.scan_start, - scan_range.scan_end, - Some(slice_w_info), - ); - out_frames.push(frame_slice); - } - - Ok(out_frames) - } - - pub fn split_frame_windows<'a>( - &'a self, - frames: &'a [Frame], - ) -> Vec> { - let mut out = Vec::new(); - - match self.grouping_level { - GroupingLevel::WindowGroup => { - for _ in 0..(self.groups.len() + 1) { - out.push(Vec::new()); - } - }, - GroupingLevel::QuadWindowGroup => { - for _ in 0..(self.row_to_group.len() + 1) { - out.push(Vec::new()); - } - }, - } - - for frame in frames { - let group = self - .get_dia_frame_window_group(frame.index) - .expect("Frame is not in MS2 frames"); - - match self.grouping_level { - GroupingLevel::WindowGroup => { - panic!("WindowGroup grouping level not implemented for splitting frames") - //out[group.id].push(frame_window); - }, - GroupingLevel::QuadWindowGroup => { - let frame_windows = self - .split_frame(frame, group) - .expect("Error splitting frame"); - for frame_window in frame_windows { - match &frame_window.slice_window_info { - None => { - panic!("Frame window has no slice window info") - }, - Some(MsMsFrameSliceWindowInfo::SingleWindow(scan_range)) => { - out[scan_range.global_quad_row_id].push(frame_window); - }, - Some(MsMsFrameSliceWindowInfo::WindowGroup(group)) => { - out[*group].push(frame_window); - }, - } - } - }, - } - } - - // Sort by ascending rt - for group in out.iter_mut() { - group.sort_by(|a, b| a.rt.partial_cmp(&b.rt).unwrap()); - } - out.retain(|x| !x.is_empty()); - - // Debug assert that the frames are sorted by rt - if cfg!(debug_assertions) { - for group in out.iter() { - for i in 0..(group.len() - 1) { - debug_assert!(group[i].rt <= group[i + 1].rt); - } - } - } - - out - } - - pub fn get_quad_windows( - &self, - scan_group_id: usize, - quad_group_id: usize, - ) -> Option<&ScanRange> { - let group = self.groups[scan_group_id].as_ref(); - let group = match group { - None => { - panic!( - "Group not found for scan group id: {}, in groups n={}", - scan_group_id, - self.groups.len() - ) - }, - Some(group) => group, - }; - - let quad_group = group.scan_ranges.get(quad_group_id); - let quad_group = match quad_group { - None => { - panic!( - "Quad group not found for quad group id: {}, in scan_ranges {:?}", - quad_group_id, group.scan_ranges - ) - }, - Some(quad_group) => quad_group, - }; - - Some(quad_group) - } -} - -// Reference for the tables: - -// CREATE TABLE DiaFrameMsMsInfo ( -// Frame INTEGER PRIMARY KEY, -// WindowGroup INTEGER NOT NULL, -// FOREIGN KEY (Frame) REFERENCES Frames (Id), -// FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) -// ) - -// CREATE TABLE DiaFrameMsMsWindows ( -// WindowGroup INTEGER NOT NULL, -// ScanNumBegin INTEGER NOT NULL, -// ScanNumEnd INTEGER NOT NULL, -// IsolationMz REAL NOT NULL, -// IsolationWidth REAL NOT NULL, -// CollisionEnergy REAL NOT NULL, -// PRIMARY KEY(WindowGroup, ScanNumBegin), -// FOREIGN KEY (WindowGroup) REFERENCES DiaFrameMsMsWindowGroups (Id) -// ) WITHOUT ROWID - -#[derive(Clone, FromRow, Debug)] -pub struct DiaFrameMsMsWindowInfo { - #[sqlx(rename = "WindowGroup")] - pub window_group: i32, - #[sqlx(rename = "ScanNumBegin")] - pub scan_num_begin: i32, - #[sqlx(rename = "ScanNumEnd")] - pub scan_num_end: i32, - #[sqlx(rename = "IsolationMz")] - pub isolation_mz: f32, - #[sqlx(rename = "IsolationWidth")] - pub isolation_width: f32, - #[sqlx(rename = "CollisionEnergy")] - pub collision_energy: f32, -} - -impl DiaFrameMsMsWindowInfo { - fn into_scan_range( - &self, - id: usize, - quad_id: usize, - scan_converter: &timsrust::Scan2ImConverter, - ) -> ScanRange { - ScanRange::new( - id, - self.window_group as usize, - quad_id, - self.scan_num_begin as usize, - self.scan_num_end as usize, - self.isolation_mz, - self.isolation_width, - self.collision_energy, - scan_converter, - ) - } -} - -#[derive(Debug)] -pub struct FrameInfoBuilder { - pub tdf_path: String, - pub scan_converter: timsrust::Scan2ImConverter, -} - -impl FrameInfoBuilder { - pub fn from_dotd_path(dotd_path: String) -> Self { - let reader = timsrust::FileReader::new(dotd_path.clone()).unwrap(); - let scan_converter = reader.get_scan_converter().unwrap(); - - // Find an 'analysis.tdf' file inside the dotd file (directory). - let tdf_path = Path::new(dotd_path.as_str()) - .join("analysis.tdf") - .into_os_string() - .into_string() - .unwrap(); - info!("tdf_path: {:?}", tdf_path); - Self { - tdf_path, - scan_converter, - } - } - - pub fn build(&self) -> Result { - let rt = Runtime::new().unwrap(); - - rt.block_on(async { self.build_async().await }) - } - - async fn build_async(&self) -> Result { - let db = SqlitePool::connect(&self.tdf_path).await?; - - // This vec maps frame_id -> window_group_id - let frame_info = self.get_frame_mapping(&db).await?; - - // This vec maps window_group_id -> Vec - // And also returns the grouping level. - let (group_mapping, grouping_level, row_to_group) = self.get_frame_windows(&db).await?; - let number_of_groups = row_to_group.iter().max().unwrap() + 1; - - debug!("Number of groups: {}", number_of_groups); - - let max_window_id = group_mapping.len() - 1; - - let mut groups_vec_o = (0..(max_window_id + 1)).map(|_| None).collect::>(); - for (i, scan_ranges) in group_mapping.into_iter().enumerate() { - let scan_ranges = match scan_ranges { - None => continue, - Some(scan_ranges) => scan_ranges, - }; - debug!("Scan ranges i={}: {:?}", i, scan_ranges); - if cfg!(debug_assertions) { - for scan_range in scan_ranges.iter() { - debug_assert!(scan_range.window_group_id == i) - } - }; - if scan_ranges.is_empty() { - continue; - } else { - groups_vec_o[i] = Some(DIAWindowGroup { - window_group_id: i, - scan_ranges, - }); - } - } - - let frame_info = DIAFrameInfo { - groups: groups_vec_o, - frame_groups: frame_info, - retention_times: DIAFrameInfo::rts_from_tdf_connection(&db).await?, - grouping_level, - number_of_groups, - row_to_group, - }; - - Ok(frame_info) - } - - async fn get_frame_mapping( - &self, - db: &Pool, - ) -> Result>, sqlx::Error> { - let result: Vec<(i32, i32)> = - sqlx::query_as("SELECT Frame, WindowGroup FROM DiaFrameMsMsInfo;") - .fetch_all(db) - .await?; - - let frame_info = result - .iter() - .map(|(id, group)| (*id as usize, *group as usize)) - .collect::>(); - - let max_id = frame_info.iter().map(|(id, _)| id).max().unwrap(); - let mut ids_map_vec = vec![None; max_id + 1]; - for (id, group) in frame_info { - ids_map_vec[id] = Some(group); - } - - Ok(ids_map_vec) - } - - async fn get_frame_windows( - &self, - db: &Pool, - ) -> Result<(Vec>>, GroupingLevel, Vec), sqlx::Error> { - let result: Vec = sqlx::query_as::<_, DiaFrameMsMsWindowInfo>( - "SELECT - WindowGroup, - ScanNumBegin, - ScanNumEnd, - IsolationMz, - IsolationWidth, - CollisionEnergy - FROM DiaFrameMsMsWindows", - ) - .fetch_all(db) - .await - .unwrap(); - - let grouping_level = if result.len() > 200 { - log::info!( - "More than 200 scan ranges, using WindowGroup grouping level. (diagonal PASEF?)" - ); - GroupingLevel::WindowGroup - } else { - log::info!( - "Less than 200 scan ranges detected, using QuadWindowGroup grouping level. \ - (diaPASEF?)" - ); - GroupingLevel::QuadWindowGroup - }; - - let max_window_id: usize = result - .iter() - .map(|window| window.window_group) - .max() - .unwrap() as usize; - - let mut group_map_vec: Vec>> = vec![None; max_window_id + 1]; - - let mut scangroup_id = 0; - let mut row_to_group = Vec::new(); - for window in result { - // TODO this is maybe a good place to make the trouping ... - // If its diapasef, the groups are quad+window groups. - // If its diagonal, the groups are only window groups. - let usize_wg = window.window_group as usize; - if group_map_vec[usize_wg].is_none() { - group_map_vec[usize_wg] = Some(Vec::new()); - } - - match &mut group_map_vec[usize_wg] { - None => continue, - Some(scan_ranges) => { - let quad_id = scan_ranges.len(); - scan_ranges.push(window.into_scan_range( - scangroup_id, - quad_id, - &self.scan_converter, - )); - scangroup_id += 1; - }, - } - - match grouping_level { - GroupingLevel::WindowGroup => { - row_to_group.push(usize_wg); - }, - GroupingLevel::QuadWindowGroup => { - row_to_group.push(scangroup_id); - }, - } - } - Ok((group_map_vec, grouping_level, row_to_group)) - } -} - -// TODO refactor this to make it a constructor method ... -pub fn read_dia_frame_info(dotd_file: String) -> Result { - let builder = FrameInfoBuilder::from_dotd_path(dotd_file); - builder.build() -} diff --git a/src/scoring.rs b/src/scoring.rs index 5d053f4..ebbc807 100644 --- a/src/scoring.rs +++ b/src/scoring.rs @@ -194,7 +194,7 @@ pub fn score_pseudospectra( config: SageSearchConfig, out_path_features: Option, num_report_psms: usize, -) -> Result, Box> { +) -> Result, std::io::Error> { // 1. Buid raw spectra from the pseudospectra let take_top_n = 250; @@ -256,8 +256,18 @@ pub fn score_pseudospectra( config.fasta_path.clone(), parameters.decoy_tag.clone(), parameters.generate_decoys, - ) - .expect("Error reading fasta"); + ); + + let sage_fasta = match sage_fasta { + Ok(x) => x, + Err(e) => { + log::error!("Error reading fasta: {:?}", e); + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + e.to_string(), + )); + }, + }; let db = parameters.clone().build(sage_fasta); diff --git a/tests/test_window_parsing.rs b/tests/test_window_parsing.rs deleted file mode 100644 index 9df5bda..0000000 --- a/tests/test_window_parsing.rs +++ /dev/null @@ -1,69 +0,0 @@ -use ionmesh::ms::tdf::{ - FrameInfoBuilder, - GroupingLevel, -}; - -#[test] -fn test_dia_pasef() { - let finfo_b = FrameInfoBuilder::from_dotd_path("tests/data/diapasef_tdf/data.d".into()); - let finfo = finfo_b.build(); - - assert!(finfo.is_ok()); - - let finfo = finfo.unwrap(); - - // The number of ids in `DiaFrameMsMsWindowGroups` + 1 bc 0 is not used - assert_eq!(finfo.groups.len(), 9); - - assert!(finfo.groups[0].is_none()); - for group in finfo.groups.iter().skip(1) { - assert!(group.is_some()); - } - - // Make sure the grouping is correctly assigned... for diaPASEF it should - // be `QuadWindowGroup` - match finfo.grouping_level { - GroupingLevel::QuadWindowGroup => {}, - GroupingLevel::WindowGroup => { - assert!(false); - }, - } - - // Make sure the grouping is correct. - // For this diapasef file is 8 * 2 (8 window groups, 2 isolation windows per group) - assert_eq!(finfo.row_to_group.iter().max().unwrap(), &(8 * 2)); - - // println!("{:?}", finfo); - // assert!(false) -} - -#[test] -fn test_synchro_dia_pasef() { - let finfo_b = FrameInfoBuilder::from_dotd_path("tests/data/synchropasef_tdf/data.d".into()); - let finfo = finfo_b.build(); - - assert!(finfo.is_ok()); - - let finfo = finfo.unwrap(); - - // The number of ids in `DiaFrameMsMsWindowGroups` + 1, bc 0 is not used - assert_eq!(finfo.groups.len(), 5); - assert!(finfo.groups[0].is_none()); - for group in finfo.groups.iter().skip(1) { - assert!(group.is_some()); - } - - // Make sure the grouping is correctly assigned... for diaPASEF it should - // be `QuadWindowGroup` - match finfo.grouping_level { - GroupingLevel::QuadWindowGroup => { - assert!(false); - }, - GroupingLevel::WindowGroup => {}, - } - - // Make sure the grouping is correct. - assert_eq!(finfo.row_to_group.iter().max().unwrap(), &4); - - // println!("{:?}", finfo); -} From cc84dab47f001ff3dda63f574c1152c47c2ca9d8 Mon Sep 17 00:00:00 2001 From: "J. Sebastian Paez" Date: Thu, 26 Sep 2024 13:30:01 -0700 Subject: [PATCH 26/26] (chore) version bump --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/aggregation/ms_denoise.rs | 20 +++++++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 542b15f..887387a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -658,7 +658,7 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "ionmesh" -version = "0.2.0" +version = "0.5.0" dependencies = [ "clap", "csv", diff --git a/Cargo.toml b/Cargo.toml index d3a6dee..7e2ddfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ionmesh" -version = "0.2.0" +version = "0.5.0" edition = "2021" license = "Apache-2.0" diff --git a/src/aggregation/ms_denoise.rs b/src/aggregation/ms_denoise.rs index e82465d..84aeb94 100644 --- a/src/aggregation/ms_denoise.rs +++ b/src/aggregation/ms_denoise.rs @@ -15,6 +15,7 @@ use serde::{ Serialize, }; use timsrust::converters::{ + ConvertableDomain, Scan2ImConverter, Tof2MzConverter, }; @@ -179,10 +180,8 @@ fn denoise_frame_slice_window( mz_converter: &Tof2MzConverter, min_n: usize, min_intensity: u64, - _mz_scaling: f64, - _max_mz_extension: f64, - _ims_scaling: f32, - _max_ims_extension: f32, + max_mz_extension: f64, + max_ims_extension: f32, ) -> DenseFrameWindow { let timer = utils::ContextTimer::new("dbscan_dfs", true, utils::LogLevel::TRACE); let fsw = FrameSliceWindow::new(frameslice_window); @@ -208,6 +207,13 @@ fn denoise_frame_slice_window( } let mut i_timer = timer.start_sub_timer("dbscan"); + // TODO make this API better ... its kind of dumb having to "know" what each index + // means in the tolerances... + let max_extensions_use = [ + (mz_converter.invert(1000.0 + max_mz_extension) - mz_converter.convert(1000.0)).abs() + as f32, + (ims_converter.invert(1.0 + max_ims_extension) - ims_converter.convert(1.0)).abs() as f32, + ]; let cluster_labels = dbscan_label_clusters( &fsw, &fsw, @@ -217,7 +223,7 @@ fn denoise_frame_slice_window( intensity_sorted_indices, None::<&(dyn Fn(&f32) -> bool + Send + Sync)>, false, - &[10., 100.], + &max_extensions_use, ); i_timer.stop(true); @@ -449,8 +455,6 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> // to have them be not hard-coded I need a way to convert // m/z space ranges to tof indices ... which is not exposed // by timsrust ... - warn!("Using prototype function for denoising, scalings are hard-coded"); - let num_windows = break_points.len() - 1; let mut out = Vec::with_capacity(num_windows); let frame_window_slices: Vec<(usize, &[FrameSlice])> = (0..num_windows) @@ -468,9 +472,7 @@ impl<'a> Denoiser<'a, Frame, Vec, Converters, Option> &self.mz_converter, self.min_n, self.min_intensity, - self.mz_scaling, self.max_mz_extension, - self.ims_scaling, self.max_ims_extension, ) };