diff --git a/CHANGELOG.md b/CHANGELOG.md index b1ac4c01c..f30be65aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 -## Unreleased -### Changed +## [0.199.0] - 2024-09-06 +### Added +- Persistency has been enabled for Task and Flow domains. + Both `TaskExecutor` and `FlowExecutor` now fully support transactional processing mode, + and save state in Postgres or Sqlite database. +- Tasks now support attaching metadata properties. Storing task->flow association as this type of metadata. +- Flows and Tasks now properly recover the unfinished requests after server restart +### Changed +- Simplified database schema for flow configurations and minimized number of migrations + (breaking change of the database schema) +- Introduced `pre_run()` phase in flow executor, task executor & outbox processor to avoid startup races +- Explicit in-memory task queue has been eliminated and replaced with event store queries - Get Data Panel: use SmTP for pull & push links - GQL api method `setConfigCompaction` allows to set `metadataOnly` configuration for both root and derived datasets - GQL api `triggerFlow` allows to trigger `HARD_COMPACTION` flow in `metadataOnly` mode for both root and derived datasets diff --git a/Cargo.lock b/Cargo.lock index 1bf8ef283..f3b363054 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "alloy-chains" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb07629a5d0645d29f68d2fb6f4d0cf15c89ec0965be915f303967180929743f" +checksum = "2b4f201b0ac8f81315fbdc55269965a8ddadbc04ab47fa65a1a468f9a40f7a5f" dependencies = [ "num_enum", "strum 0.26.3", @@ -389,7 +389,7 @@ checksum = "4d0f2d905ebd295e7effec65e5f6868d153936130ae718352771de3e7d03c75c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -481,7 +481,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -494,11 +494,11 @@ dependencies = [ "alloy-sol-macro-input", "const-hex", "heck 0.5.0", - "indexmap 2.4.0", + "indexmap 2.5.0", "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", "syn-solidity", "tiny-keccak", ] @@ -516,7 +516,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.76", + "syn 2.0.77", "syn-solidity", ] @@ -721,7 +721,7 @@ dependencies = [ "num-bigint", "num-traits", "paste", - "rustc_version 0.4.0", + "rustc_version 0.4.1", "zeroize", ] @@ -1005,7 +1005,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.4.0", + "indexmap 2.5.0", "lexical-core", "num", "serde", @@ -1150,7 +1150,7 @@ dependencies = [ "futures-util", "handlebars", "http 0.2.12", - "indexmap 2.4.0", + "indexmap 2.5.0", "mime", "multer", "num-traits", @@ -1197,7 +1197,7 @@ dependencies = [ "proc-macro2", "quote", "strum 0.25.0", - "syn 2.0.76", + "syn 2.0.77", "thiserror", ] @@ -1220,7 +1220,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "323a5143f5bdd2030f45e3f2e0c821c9b1d36e79cf382129c64299c50a7f3750" dependencies = [ "bytes", - "indexmap 2.4.0", + "indexmap 2.5.0", "serde", "serde_json", ] @@ -1233,7 +1233,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -1255,18 +1255,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -1277,7 +1277,7 @@ checksum = "b6d7b9decdf35d8908a7e3ef02f64c5e9b1695e230154c0e8de3969142d9b94c" dependencies = [ "futures", "pharos", - "rustc_version 0.4.0", + "rustc_version 0.4.1", ] [[package]] @@ -1303,7 +1303,7 @@ checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -1707,7 +1707,7 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "http 0.2.12", - "rustc_version 0.4.0", + "rustc_version 0.4.1", "tracing", ] @@ -2096,9 +2096,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.15" +version = "1.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" +checksum = "e9d013ecb737093c0e86b151a7b837993cf9ec6c502946cfb44bedc392421e0b" dependencies = [ "jobserver", "libc", @@ -2208,9 +2208,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.16" +version = "4.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" +checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" dependencies = [ "clap_builder", "clap_derive", @@ -2218,9 +2218,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.15" +version = "4.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" +checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" dependencies = [ "anstream", "anstyle", @@ -2230,9 +2230,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.23" +version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531d7959c5bbb6e266cecdd0f20213639c3a5c3e4d615f97db87661745f781ff" +checksum = "205d5ef6d485fa47606b98b0ddc4ead26eb850aaa86abfb562a94fb3280ecba0" dependencies = [ "clap", ] @@ -2246,7 +2246,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2368,7 +2368,7 @@ checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "container-runtime" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "cfg-if", @@ -2430,9 +2430,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -2458,7 +2458,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" dependencies = [ - "rustc_version 0.4.0", + "rustc_version 0.4.1", ] [[package]] @@ -2709,7 +2709,7 @@ dependencies = [ "curve25519-dalek-derive", "digest 0.10.7", "fiat-crypto", - "rustc_version 0.4.0", + "rustc_version 0.4.1", "subtle", "zeroize", ] @@ -2722,7 +2722,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2752,7 +2752,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2763,7 +2763,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2787,9 +2787,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "6.0.1" +version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" dependencies = [ "cfg-if", "crossbeam-utils", @@ -2807,7 +2807,7 @@ checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "database-common" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "aws-config", @@ -2831,10 +2831,10 @@ dependencies = [ [[package]] name = "database-common-macros" -version = "0.198.2" +version = "0.199.0" dependencies = [ "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -2853,7 +2853,7 @@ dependencies = [ "bytes", "bzip2", "chrono", - "dashmap 6.0.1", + "dashmap 6.1.0", "datafusion-catalog", "datafusion-common", "datafusion-common-runtime", @@ -2873,7 +2873,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.5", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "num_cpus", @@ -2963,7 +2963,7 @@ checksum = "799e70968c815b611116951e3dd876aef04bf217da31b72eec01ee6a959336a1" dependencies = [ "arrow", "chrono", - "dashmap 6.0.1", + "dashmap 6.1.0", "datafusion-common", "datafusion-expr", "futures", @@ -3108,7 +3108,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.5", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "paste", @@ -3137,7 +3137,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "hex", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "paste", @@ -3195,7 +3195,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.4.0", + "indexmap 2.5.0", "itertools 0.12.1", "log", "once_cell", @@ -3294,8 +3294,8 @@ dependencies = [ "convert_case", "proc-macro2", "quote", - "rustc_version 0.4.0", - "syn 2.0.76", + "rustc_version 0.4.1", + "syn 2.0.77", ] [[package]] @@ -3350,7 +3350,7 @@ checksum = "9d0e68e1e07d64dbf3bb2991657979ec4e3fe13b7b3c18067b802052af1330a3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -3581,12 +3581,12 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "enum-variants" -version = "0.198.2" +version = "0.199.0" [[package]] name = "env_filter" @@ -3659,7 +3659,7 @@ dependencies = [ [[package]] name = "event-sourcing" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -3675,10 +3675,10 @@ dependencies = [ [[package]] name = "event-sourcing-macros" -version = "0.198.2" +version = "0.199.0" dependencies = [ "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -3746,9 +3746,9 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" +checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" dependencies = [ "cfg-if", "libc", @@ -3781,7 +3781,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" dependencies = [ "bitflags 1.3.2", - "rustc_version 0.4.0", + "rustc_version 0.4.1", ] [[package]] @@ -3914,7 +3914,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -4104,7 +4104,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.4.0", + "indexmap 2.5.0", "slab", "tokio", "tokio-util", @@ -4123,7 +4123,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.4.0", + "indexmap 2.5.0", "slab", "tokio", "tokio-util", @@ -4339,7 +4339,7 @@ dependencies = [ [[package]] name = "http-common" -version = "0.198.2" +version = "0.199.0" dependencies = [ "axum", "http 0.2.12", @@ -4444,16 +4444,16 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http 1.1.0", "hyper 1.4.1", "hyper-util", "rustls 0.23.12", - "rustls-native-certs 0.7.2", + "rustls-native-certs 0.8.0", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -4581,7 +4581,7 @@ dependencies = [ "libflate", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -4597,9 +4597,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -4660,7 +4660,7 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "internal-error" -version = "0.198.2" +version = "0.199.0" dependencies = [ "thiserror", ] @@ -4817,7 +4817,7 @@ dependencies = [ [[package]] name = "kamu" -version = "0.198.2" +version = "0.199.0" dependencies = [ "alloy", "async-recursion", @@ -4836,7 +4836,7 @@ dependencies = [ "criterion", "curl", "curl-sys", - "dashmap 6.0.1", + "dashmap 6.1.0", "database-common", "datafusion", "datafusion-ethers", @@ -4904,7 +4904,7 @@ dependencies = [ [[package]] name = "kamu-accounts" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "base32", @@ -4930,7 +4930,7 @@ dependencies = [ [[package]] name = "kamu-accounts-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -4951,7 +4951,7 @@ dependencies = [ [[package]] name = "kamu-accounts-mysql" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -4972,7 +4972,7 @@ dependencies = [ [[package]] name = "kamu-accounts-postgres" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -4993,7 +4993,7 @@ dependencies = [ [[package]] name = "kamu-accounts-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "argon2", "chrono", @@ -5009,7 +5009,7 @@ dependencies = [ [[package]] name = "kamu-accounts-services" -version = "0.198.2" +version = "0.199.0" dependencies = [ "argon2", "async-trait", @@ -5035,7 +5035,7 @@ dependencies = [ [[package]] name = "kamu-accounts-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5056,7 +5056,7 @@ dependencies = [ [[package]] name = "kamu-adapter-auth-oso" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "dill", @@ -5078,12 +5078,12 @@ dependencies = [ [[package]] name = "kamu-adapter-flight-sql" -version = "0.198.2" +version = "0.199.0" dependencies = [ "arrow-flight", "async-trait", "base64 0.22.1", - "dashmap 6.0.1", + "dashmap 6.1.0", "datafusion", "futures", "indoc 2.0.5", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "kamu-adapter-graphql" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-graphql", "async-trait", @@ -5151,7 +5151,7 @@ dependencies = [ [[package]] name = "kamu-adapter-http" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "aws-sdk-s3", @@ -5211,7 +5211,7 @@ dependencies = [ [[package]] name = "kamu-adapter-oauth" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5230,7 +5230,7 @@ dependencies = [ [[package]] name = "kamu-adapter-odata" -version = "0.198.2" +version = "0.199.0" dependencies = [ "axum", "chrono", @@ -5265,7 +5265,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "internal-error", @@ -5277,7 +5277,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "database-common-macros", @@ -5291,7 +5291,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "dill", "kamu-auth-rebac", @@ -5300,7 +5300,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-services" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "dill", @@ -5319,7 +5319,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "database-common", @@ -5336,7 +5336,7 @@ dependencies = [ [[package]] name = "kamu-cli" -version = "0.198.2" +version = "0.199.0" dependencies = [ "arrow-flight", "async-graphql", @@ -5402,7 +5402,9 @@ dependencies = [ "kamu-messaging-outbox-postgres", "kamu-messaging-outbox-sqlite", "kamu-task-system-inmem", + "kamu-task-system-postgres", "kamu-task-system-services", + "kamu-task-system-sqlite", "libc", "merge", "messaging-outbox", @@ -5450,7 +5452,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-common" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", "indoc 2.0.5", @@ -5470,15 +5472,15 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-common-macros" -version = "0.198.2" +version = "0.199.0" dependencies = [ "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "kamu-cli-e2e-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5491,7 +5493,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-mysql" -version = "0.198.2" +version = "0.199.0" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5505,7 +5507,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-postgres" -version = "0.198.2" +version = "0.199.0" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5519,7 +5521,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", "indoc 2.0.5", @@ -5535,7 +5537,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5549,7 +5551,7 @@ dependencies = [ [[package]] name = "kamu-cli-puppet" -version = "0.198.2" +version = "0.199.0" dependencies = [ "assert_cmd", "async-trait", @@ -5565,7 +5567,7 @@ dependencies = [ [[package]] name = "kamu-core" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -5595,7 +5597,7 @@ dependencies = [ [[package]] name = "kamu-data-utils" -version = "0.198.2" +version = "0.199.0" dependencies = [ "arrow", "arrow-digest", @@ -5620,7 +5622,7 @@ dependencies = [ [[package]] name = "kamu-datafusion-cli" -version = "0.198.2" +version = "0.199.0" dependencies = [ "arrow", "async-trait", @@ -5642,7 +5644,7 @@ dependencies = [ [[package]] name = "kamu-datasets" -version = "0.198.2" +version = "0.199.0" dependencies = [ "aes-gcm", "async-trait", @@ -5661,7 +5663,7 @@ dependencies = [ [[package]] name = "kamu-datasets-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5684,7 +5686,7 @@ dependencies = [ [[package]] name = "kamu-datasets-postgres" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5706,7 +5708,7 @@ dependencies = [ [[package]] name = "kamu-datasets-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", "database-common", @@ -5720,7 +5722,7 @@ dependencies = [ [[package]] name = "kamu-datasets-services" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5741,7 +5743,7 @@ dependencies = [ [[package]] name = "kamu-datasets-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5764,12 +5766,14 @@ dependencies = [ [[package]] name = "kamu-flow-system" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", "cron", + "database-common", "datafusion", + "dill", "enum-variants", "event-sourcing", "internal-error", @@ -5790,19 +5794,20 @@ dependencies = [ [[package]] name = "kamu-flow-system-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", "chrono", "cron", + "database-common", "database-common-macros", "dill", "futures", + "internal-error", "kamu-flow-system", "kamu-flow-system-repo-tests", "kamu-task-system", - "kamu-task-system-inmem", "opendatafabric", "serde", "serde_with", @@ -5819,7 +5824,7 @@ dependencies = [ [[package]] name = "kamu-flow-system-postgres" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -5843,18 +5848,20 @@ dependencies = [ [[package]] name = "kamu-flow-system-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", + "database-common", "dill", "futures", "kamu-flow-system", + "kamu-task-system", "opendatafabric", ] [[package]] name = "kamu-flow-system-services" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -5896,7 +5903,7 @@ dependencies = [ [[package]] name = "kamu-flow-system-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -5920,7 +5927,7 @@ dependencies = [ [[package]] name = "kamu-ingest-datafusion" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5957,7 +5964,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -5976,7 +5983,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-postgres" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -5999,7 +6006,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", "database-common", @@ -6013,7 +6020,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -6035,7 +6042,7 @@ dependencies = [ [[package]] name = "kamu-repo-tools" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", "clap", @@ -6050,23 +6057,25 @@ dependencies = [ [[package]] name = "kamu-task-system" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", + "database-common", "enum-variants", "event-sourcing", "kamu-core", "messaging-outbox", "opendatafabric", "serde", + "sqlx", "thiserror", "tokio-stream", ] [[package]] name = "kamu-task-system-inmem" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -6085,7 +6094,7 @@ dependencies = [ [[package]] name = "kamu-task-system-postgres" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -6108,9 +6117,10 @@ dependencies = [ [[package]] name = "kamu-task-system-repo-tests" -version = "0.198.2" +version = "0.199.0" dependencies = [ "chrono", + "database-common", "dill", "futures", "kamu-task-system", @@ -6119,7 +6129,7 @@ dependencies = [ [[package]] name = "kamu-task-system-services" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -6133,6 +6143,7 @@ dependencies = [ "kamu-task-system", "kamu-task-system-inmem", "messaging-outbox", + "mockall", "opendatafabric", "serde_json", "test-log", @@ -6143,7 +6154,7 @@ dependencies = [ [[package]] name = "kamu-task-system-sqlite" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-stream", "async-trait", @@ -6328,7 +6339,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.6.0", "libc", - "redox_syscall 0.5.3", + "redox_syscall", ] [[package]] @@ -6533,7 +6544,7 @@ dependencies = [ [[package]] name = "messaging-outbox" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -6680,7 +6691,7 @@ dependencies = [ [[package]] name = "multiformats" -version = "0.198.2" +version = "0.199.0" dependencies = [ "bs58", "digest 0.10.7", @@ -6915,7 +6926,7 @@ checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -6944,9 +6955,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.3" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "memchr", ] @@ -7005,7 +7016,7 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "opendatafabric" -version = "0.198.2" +version = "0.199.0" dependencies = [ "arrow", "base64 0.22.1", @@ -7041,9 +7052,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.3.1+3.3.1" +version = "300.3.2+3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91" +checksum = "a211a18d945ef7e648cc6e0058f4c548ee46aab922ea203e0d30e966ea23647b" dependencies = [ "cc", ] @@ -7174,7 +7185,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.3", + "redox_syscall", "smallvec", "windows-targets 0.52.6", ] @@ -7326,7 +7337,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7347,7 +7358,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.4.0", + "indexmap 2.5.0", ] [[package]] @@ -7357,7 +7368,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9567389417feee6ce15dd6527a8a1ecac205ef62c2932bcf3d9f6fc5b78b414" dependencies = [ "futures", - "rustc_version 0.4.0", + "rustc_version 0.4.1", ] [[package]] @@ -7424,7 +7435,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7727,7 +7738,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -7757,9 +7768,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.3" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b22d8e7369034b9a7132bc2008cac12f2013c8132b45e0554e6e20e2617f2156" +checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" dependencies = [ "bytes", "pin-project-lite", @@ -7775,9 +7786,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", @@ -7792,15 +7803,15 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" +checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" dependencies = [ "libc", "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -7869,7 +7880,7 @@ dependencies = [ [[package]] name = "random-names" -version = "0.198.2" +version = "0.199.0" dependencies = [ "rand", ] @@ -7906,15 +7917,6 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f178674da3d005db760b30d6735a989d692da37b86337daec6f2e311223d608" -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.3" @@ -8039,7 +8041,7 @@ dependencies = [ "http-body 1.0.1", "http-body-util", "hyper 1.4.1", - "hyper-rustls 0.27.2", + "hyper-rustls 0.27.3", "hyper-util", "ipnet", "js-sys", @@ -8050,7 +8052,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.12", - "rustls-native-certs 0.7.2", + "rustls-native-certs 0.7.3", "rustls-pemfile 2.1.3", "rustls-pki-types", "serde", @@ -8241,7 +8243,7 @@ dependencies = [ "quote", "rust-embed-utils", "shellexpand", - "syn 2.0.76", + "syn 2.0.77", "walkdir", ] @@ -8284,18 +8286,18 @@ dependencies = [ [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver 1.0.23", ] [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ "bitflags 2.6.0", "errno", @@ -8325,7 +8327,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.6", + "rustls-webpki 0.102.7", "subtle", "zeroize", ] @@ -8344,9 +8346,22 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.1.3", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a" dependencies = [ "openssl-probe", "rustls-pemfile 2.1.3", @@ -8392,9 +8407,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.102.6" +version = "0.102.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" +checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" dependencies = [ "ring", "rustls-pki-types", @@ -8598,14 +8613,14 @@ checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", "memchr", @@ -8654,7 +8669,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.4.0", + "indexmap 2.5.0", "serde", "serde_derive", "serde_json", @@ -8671,7 +8686,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -8680,7 +8695,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.4.0", + "indexmap 2.5.0", "itoa", "ryu", "serde", @@ -8945,14 +8960,14 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "sqlx" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcfa89bea9500db4a0d038513d7a060566bfc51d46d1c014847049a45cce85e8" +checksum = "93334716a037193fac19df402f8571269c84a00852f6a7066b5d2616dcd64d3e" dependencies = [ "sqlx-core", "sqlx-macros", @@ -8963,9 +8978,9 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d06e2f2bd861719b1f3f0c7dbe1d80c30bf59e76cf019f07d9014ed7eefb8e08" +checksum = "d4d8060b456358185f7d50c55d9b5066ad956956fddec42ee2e8567134a8936e" dependencies = [ "atoi", "byteorder", @@ -8983,7 +8998,7 @@ dependencies = [ "hashbrown 0.14.5", "hashlink", "hex", - "indexmap 2.4.0", + "indexmap 2.5.0", "log", "memchr", "once_cell", @@ -9002,27 +9017,27 @@ dependencies = [ "tracing", "url", "uuid", - "webpki-roots 0.26.3", + "webpki-roots 0.26.5", ] [[package]] name = "sqlx-macros" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f998a9defdbd48ed005a89362bd40dd2117502f15294f61c8d47034107dbbdc" +checksum = "cac0692bcc9de3b073e8d747391827297e075c7710ff6276d9f7a1f3d58c6657" dependencies = [ "proc-macro2", "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] name = "sqlx-macros-core" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d100558134176a2629d46cec0c8891ba0be8910f7896abfdb75ef4ab6f4e7ce" +checksum = "1804e8a7c7865599c9c79be146dc8a9fd8cc86935fa641d3ea58e5f0688abaa5" dependencies = [ "dotenvy", "either", @@ -9038,7 +9053,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.76", + "syn 2.0.77", "tempfile", "tokio", "url", @@ -9046,9 +9061,9 @@ dependencies = [ [[package]] name = "sqlx-mysql" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cac0ab331b14cb3921c62156d913e4c15b74fb6ec0f3146bd4ef6e4fb3c12" +checksum = "64bb4714269afa44aef2755150a0fc19d756fb580a67db8885608cf02f47d06a" dependencies = [ "atoi", "base64 0.22.1", @@ -9090,9 +9105,9 @@ dependencies = [ [[package]] name = "sqlx-postgres" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9734dbce698c67ecf67c442f768a5e90a49b2a4d61a9f1d59f73874bd4cf0710" +checksum = "6fa91a732d854c5d7726349bb4bb879bb9478993ceb764247660aee25f67c2f8" dependencies = [ "atoi", "base64 0.22.1", @@ -9130,9 +9145,9 @@ dependencies = [ [[package]] name = "sqlx-sqlite" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75b419c3c1b1697833dd927bdc4c6545a620bc1bbafabd44e1efbe9afcd337e" +checksum = "d5b2cf34a45953bfd3daaf3db0f7a7878ab9b7a6b91b422d24a7a9e4c857b680" dependencies = [ "atoi", "chrono", @@ -9236,7 +9251,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9249,7 +9264,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9271,9 +9286,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.76" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -9289,7 +9304,7 @@ dependencies = [ "paste", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9385,7 +9400,7 @@ dependencies = [ "proc-macro2", "quote", "sha2", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9407,7 +9422,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9436,7 +9451,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9504,7 +9519,7 @@ dependencies = [ [[package]] name = "time-source" -version = "0.198.2" +version = "0.199.0" dependencies = [ "async-trait", "chrono", @@ -9549,9 +9564,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.3" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", @@ -9583,7 +9598,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9620,9 +9635,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.15" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" dependencies = [ "futures-core", "pin-project-lite", @@ -9658,14 +9673,14 @@ dependencies = [ "tokio", "tokio-rustls 0.26.0", "tungstenite 0.23.0", - "webpki-roots 0.26.3", + "webpki-roots 0.26.5", ] [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -9702,7 +9717,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.4.0", + "indexmap 2.5.0", "toml_datetime", "winnow 0.5.40", ] @@ -9713,7 +9728,7 @@ version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ - "indexmap 2.4.0", + "indexmap 2.5.0", "serde", "serde_spanned", "toml_datetime", @@ -9836,7 +9851,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -9891,7 +9906,7 @@ dependencies = [ [[package]] name = "tracing-perfetto" -version = "0.198.2" +version = "0.199.0" dependencies = [ "conv", "serde", @@ -10200,7 +10215,7 @@ dependencies = [ "cargo_metadata", "cfg-if", "regex", - "rustc_version 0.4.0", + "rustc_version 0.4.1", "rustversion", "time", ] @@ -10279,7 +10294,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", "wasm-bindgen-shared", ] @@ -10313,7 +10328,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -10372,20 +10387,20 @@ checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" [[package]] name = "webpki-roots" -version = "0.26.3" +version = "0.26.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" +checksum = "0bd24728e5af82c6c4ec1b66ac4844bdf8156257fccda846ec58b42cd0cdbe6a" dependencies = [ "rustls-pki-types", ] [[package]] name = "whoami" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +checksum = "372d5b87f58ec45c384ba03563b03544dc5fadc3983e434b286913f5b4a9bb6d" dependencies = [ - "redox_syscall 0.4.1", + "redox_syscall", "wasite", "web-sys", ] @@ -10719,7 +10734,7 @@ dependencies = [ "js-sys", "log", "pharos", - "rustc_version 0.4.0", + "rustc_version 0.4.1", "send_wrapper", "thiserror", "wasm-bindgen", @@ -10786,7 +10801,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] @@ -10806,7 +10821,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.76", + "syn 2.0.77", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d03ec0061..dc2e79089 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,91 +88,91 @@ resolver = "2" [workspace.dependencies] # Apps -kamu-cli = { version = "0.198.2", path = "src/app/cli", default-features = false } +kamu-cli = { version = "0.199.0", path = "src/app/cli", default-features = false } # Utils -container-runtime = { version = "0.198.2", path = "src/utils/container-runtime", default-features = false } -database-common = { version = "0.198.2", path = "src/utils/database-common", default-features = false } -database-common-macros = { version = "0.198.2", path = "src/utils/database-common-macros", default-features = false } -enum-variants = { version = "0.198.2", path = "src/utils/enum-variants", default-features = false } -event-sourcing = { version = "0.198.2", path = "src/utils/event-sourcing", default-features = false } -event-sourcing-macros = { version = "0.198.2", path = "src/utils/event-sourcing-macros", default-features = false } -http-common = { version = "0.198.2", path = "src/utils/http-common", default-features = false } -internal-error = { version = "0.198.2", path = "src/utils/internal-error", default-features = false } -kamu-cli-puppet = { version = "0.198.2", path = "src/utils/kamu-cli-puppet", default-features = false } -kamu-data-utils = { version = "0.198.2", path = "src/utils/data-utils", default-features = false } -kamu-datafusion-cli = { version = "0.198.2", path = "src/utils/datafusion-cli", default-features = false } -messaging-outbox = { version = "0.198.2", path = "src/utils/messaging-outbox", default-features = false } -multiformats = { version = "0.198.2", path = "src/utils/multiformats", default-features = false } -random-names = { version = "0.198.2", path = "src/utils/random-names", default-features = false } -time-source = { version = "0.198.2", path = "src/utils/time-source", default-features = false } -tracing-perfetto = { version = "0.198.2", path = "src/utils/tracing-perfetto", default-features = false } +container-runtime = { version = "0.199.0", path = "src/utils/container-runtime", default-features = false } +database-common = { version = "0.199.0", path = "src/utils/database-common", default-features = false } +database-common-macros = { version = "0.199.0", path = "src/utils/database-common-macros", default-features = false } +enum-variants = { version = "0.199.0", path = "src/utils/enum-variants", default-features = false } +event-sourcing = { version = "0.199.0", path = "src/utils/event-sourcing", default-features = false } +event-sourcing-macros = { version = "0.199.0", path = "src/utils/event-sourcing-macros", default-features = false } +http-common = { version = "0.199.0", path = "src/utils/http-common", default-features = false } +internal-error = { version = "0.199.0", path = "src/utils/internal-error", default-features = false } +kamu-cli-puppet = { version = "0.199.0", path = "src/utils/kamu-cli-puppet", default-features = false } +kamu-data-utils = { version = "0.199.0", path = "src/utils/data-utils", default-features = false } +kamu-datafusion-cli = { version = "0.199.0", path = "src/utils/datafusion-cli", default-features = false } +messaging-outbox = { version = "0.199.0", path = "src/utils/messaging-outbox", default-features = false } +multiformats = { version = "0.199.0", path = "src/utils/multiformats", default-features = false } +random-names = { version = "0.199.0", path = "src/utils/random-names", default-features = false } +time-source = { version = "0.199.0", path = "src/utils/time-source", default-features = false } +tracing-perfetto = { version = "0.199.0", path = "src/utils/tracing-perfetto", default-features = false } # Domain -kamu-accounts = { version = "0.198.2", path = "src/domain/accounts/domain", default-features = false } -kamu-auth-rebac = { version = "0.198.2", path = "src/domain/auth-rebac/domain", default-features = false } -kamu-core = { version = "0.198.2", path = "src/domain/core", default-features = false } -kamu-datasets = { version = "0.198.2", path = "src/domain/datasets/domain", default-features = false } -kamu-flow-system = { version = "0.198.2", path = "src/domain/flow-system/domain", default-features = false } -kamu-task-system = { version = "0.198.2", path = "src/domain/task-system/domain", default-features = false } -opendatafabric = { version = "0.198.2", path = "src/domain/opendatafabric", default-features = false } +kamu-accounts = { version = "0.199.0", path = "src/domain/accounts/domain", default-features = false } +kamu-auth-rebac = { version = "0.199.0", path = "src/domain/auth-rebac/domain", default-features = false } +kamu-core = { version = "0.199.0", path = "src/domain/core", default-features = false } +kamu-datasets = { version = "0.199.0", path = "src/domain/datasets/domain", default-features = false } +kamu-flow-system = { version = "0.199.0", path = "src/domain/flow-system/domain", default-features = false } +kamu-task-system = { version = "0.199.0", path = "src/domain/task-system/domain", default-features = false } +opendatafabric = { version = "0.199.0", path = "src/domain/opendatafabric", default-features = false } # Domain service layer -kamu-accounts-services = { version = "0.198.2", path = "src/domain/accounts/services", default-features = false } -kamu-auth-rebac-services = { version = "0.198.2", path = "src/domain/auth-rebac/services", default-features = false } -kamu-datasets-services = { version = "0.198.2", path = "src/domain/datasets/services", default-features = false } -kamu-flow-system-services = { version = "0.198.2", path = "src/domain/flow-system/services", default-features = false } -kamu-task-system-services = { version = "0.198.2", path = "src/domain/task-system/services", default-features = false } +kamu-accounts-services = { version = "0.199.0", path = "src/domain/accounts/services", default-features = false } +kamu-auth-rebac-services = { version = "0.199.0", path = "src/domain/auth-rebac/services", default-features = false } +kamu-datasets-services = { version = "0.199.0", path = "src/domain/datasets/services", default-features = false } +kamu-flow-system-services = { version = "0.199.0", path = "src/domain/flow-system/services", default-features = false } +kamu-task-system-services = { version = "0.199.0", path = "src/domain/task-system/services", default-features = false } # Infra -kamu = { version = "0.198.2", path = "src/infra/core", default-features = false } -kamu-ingest-datafusion = { version = "0.198.2", path = "src/infra/ingest-datafusion", default-features = false } +kamu = { version = "0.199.0", path = "src/infra/core", default-features = false } +kamu-ingest-datafusion = { version = "0.199.0", path = "src/infra/ingest-datafusion", default-features = false } ## Flow System -kamu-flow-system-repo-tests = { version = "0.198.2", path = "src/infra/flow-system/repo-tests", default-features = false } -kamu-flow-system-inmem = { version = "0.198.2", path = "src/infra/flow-system/inmem", default-features = false } -kamu-flow-system-postgres = { version = "0.198.2", path = "src/infra/flow-system/postgres", default-features = false } -kamu-flow-system-sqlite = { version = "0.198.2", path = "src/infra/flow-system/sqlite", default-features = false } +kamu-flow-system-repo-tests = { version = "0.199.0", path = "src/infra/flow-system/repo-tests", default-features = false } +kamu-flow-system-inmem = { version = "0.199.0", path = "src/infra/flow-system/inmem", default-features = false } +kamu-flow-system-postgres = { version = "0.199.0", path = "src/infra/flow-system/postgres", default-features = false } +kamu-flow-system-sqlite = { version = "0.199.0", path = "src/infra/flow-system/sqlite", default-features = false } ## Accounts -kamu-accounts-inmem = { version = "0.198.2", path = "src/infra/accounts/inmem", default-features = false } -kamu-accounts-mysql = { version = "0.198.2", path = "src/infra/accounts/mysql", default-features = false } -kamu-accounts-postgres = { version = "0.198.2", path = "src/infra/accounts/postgres", default-features = false } -kamu-accounts-sqlite = { version = "0.198.2", path = "src/infra/accounts/sqlite", default-features = false } -kamu-accounts-repo-tests = { version = "0.198.2", path = "src/infra/accounts/repo-tests", default-features = false } +kamu-accounts-inmem = { version = "0.199.0", path = "src/infra/accounts/inmem", default-features = false } +kamu-accounts-mysql = { version = "0.199.0", path = "src/infra/accounts/mysql", default-features = false } +kamu-accounts-postgres = { version = "0.199.0", path = "src/infra/accounts/postgres", default-features = false } +kamu-accounts-sqlite = { version = "0.199.0", path = "src/infra/accounts/sqlite", default-features = false } +kamu-accounts-repo-tests = { version = "0.199.0", path = "src/infra/accounts/repo-tests", default-features = false } ## Datasets -kamu-datasets-inmem = { version = "0.198.2", path = "src/infra/datasets/inmem", default-features = false } -kamu-datasets-postgres = { version = "0.198.2", path = "src/infra/datasets/postgres", default-features = false } -kamu-datasets-sqlite = { version = "0.198.2", path = "src/infra/datasets/sqlite", default-features = false } -kamu-datasets-repo-tests = { version = "0.198.2", path = "src/infra/datasets/repo-tests", default-features = false } +kamu-datasets-inmem = { version = "0.199.0", path = "src/infra/datasets/inmem", default-features = false } +kamu-datasets-postgres = { version = "0.199.0", path = "src/infra/datasets/postgres", default-features = false } +kamu-datasets-sqlite = { version = "0.199.0", path = "src/infra/datasets/sqlite", default-features = false } +kamu-datasets-repo-tests = { version = "0.199.0", path = "src/infra/datasets/repo-tests", default-features = false } ## Task System -kamu-task-system-inmem = { version = "0.198.2", path = "src/infra/task-system/inmem", default-features = false } -kamu-task-system-postgres = { version = "0.198.2", path = "src/infra/task-system/postgres", default-features = false } -kamu-task-system-sqlite = { version = "0.198.2", path = "src/infra/task-system/sqlite", default-features = false } -kamu-task-system-repo-tests = { version = "0.198.2", path = "src/infra/task-system/repo-tests", default-features = false } +kamu-task-system-inmem = { version = "0.199.0", path = "src/infra/task-system/inmem", default-features = false } +kamu-task-system-postgres = { version = "0.199.0", path = "src/infra/task-system/postgres", default-features = false } +kamu-task-system-sqlite = { version = "0.199.0", path = "src/infra/task-system/sqlite", default-features = false } +kamu-task-system-repo-tests = { version = "0.199.0", path = "src/infra/task-system/repo-tests", default-features = false } ## ReBAC -kamu-auth-rebac-inmem = { version = "0.198.2", path = "src/infra/auth-rebac/inmem", default-features = false } -kamu-auth-rebac-repo-tests = { version = "0.198.2", path = "src/infra/auth-rebac/repo-tests", default-features = false } -kamu-auth-rebac-sqlite = { version = "0.198.2", path = "src/infra/auth-rebac/sqlite", default-features = false } +kamu-auth-rebac-inmem = { version = "0.199.0", path = "src/infra/auth-rebac/inmem", default-features = false } +kamu-auth-rebac-repo-tests = { version = "0.199.0", path = "src/infra/auth-rebac/repo-tests", default-features = false } +kamu-auth-rebac-sqlite = { version = "0.199.0", path = "src/infra/auth-rebac/sqlite", default-features = false } ## Outbox -kamu-messaging-outbox-inmem = { version = "0.198.2", path = "src/infra/messaging-outbox/inmem", default-features = false } -kamu-messaging-outbox-postgres = { version = "0.198.2", path = "src/infra/messaging-outbox/postgres", default-features = false } -kamu-messaging-outbox-sqlite = { version = "0.198.2", path = "src/infra/messaging-outbox/sqlite", default-features = false } -kamu-messaging-outbox-repo-tests = { version = "0.198.2", path = "src/infra/messaging-outbox/repo-tests", default-features = false } +kamu-messaging-outbox-inmem = { version = "0.199.0", path = "src/infra/messaging-outbox/inmem", default-features = false } +kamu-messaging-outbox-postgres = { version = "0.199.0", path = "src/infra/messaging-outbox/postgres", default-features = false } +kamu-messaging-outbox-sqlite = { version = "0.199.0", path = "src/infra/messaging-outbox/sqlite", default-features = false } +kamu-messaging-outbox-repo-tests = { version = "0.199.0", path = "src/infra/messaging-outbox/repo-tests", default-features = false } # Adapters -kamu-adapter-auth-oso = { version = "0.198.2", path = "src/adapter/auth-oso", default-features = false } -kamu-adapter-flight-sql = { version = "0.198.2", path = "src/adapter/flight-sql", default-features = false } -kamu-adapter-graphql = { version = "0.198.2", path = "src/adapter/graphql", default-features = false } -kamu-adapter-http = { version = "0.198.2", path = "src/adapter/http", default-features = false } -kamu-adapter-odata = { version = "0.198.2", path = "src/adapter/odata", default-features = false } -kamu-adapter-oauth = { version = "0.198.2", path = "src/adapter/oauth", default-features = false } +kamu-adapter-auth-oso = { version = "0.199.0", path = "src/adapter/auth-oso", default-features = false } +kamu-adapter-flight-sql = { version = "0.199.0", path = "src/adapter/flight-sql", default-features = false } +kamu-adapter-graphql = { version = "0.199.0", path = "src/adapter/graphql", default-features = false } +kamu-adapter-http = { version = "0.199.0", path = "src/adapter/http", default-features = false } +kamu-adapter-odata = { version = "0.199.0", path = "src/adapter/odata", default-features = false } +kamu-adapter-oauth = { version = "0.199.0", path = "src/adapter/oauth", default-features = false } # E2E -kamu-cli-e2e-common = { version = "0.198.2", path = "src/e2e/app/cli/common", default-features = false } -kamu-cli-e2e-common-macros = { version = "0.198.2", path = "src/e2e/app/cli/common-macros", default-features = false } -kamu-cli-e2e-repo-tests = { version = "0.198.2", path = "src/e2e/app/cli/repo-tests", default-features = false } +kamu-cli-e2e-common = { version = "0.199.0", path = "src/e2e/app/cli/common", default-features = false } +kamu-cli-e2e-common-macros = { version = "0.199.0", path = "src/e2e/app/cli/common-macros", default-features = false } +kamu-cli-e2e-repo-tests = { version = "0.199.0", path = "src/e2e/app/cli/repo-tests", default-features = false } [workspace.package] -version = "0.198.2" +version = "0.199.0" edition = "2021" homepage = "https://github.com/kamu-data/kamu-cli" repository = "https://github.com/kamu-data/kamu-cli" diff --git a/LICENSE.txt b/LICENSE.txt index 04c2e52a3..d3cd87937 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -11,7 +11,7 @@ Business Source License 1.1 Licensor: Kamu Data, Inc. -Licensed Work: Kamu CLI Version 0.198.2 +Licensed Work: Kamu CLI Version 0.199.0 The Licensed Work is © 2023 Kamu Data, Inc. Additional Use Grant: You may use the Licensed Work for any purpose, @@ -24,7 +24,7 @@ Additional Use Grant: You may use the Licensed Work for any purpose, Licensed Work where data or transformations are controlled by such third parties. -Change Date: 2028-08-27 +Change Date: 2028-09-06 Change License: Apache License, Version 2.0 diff --git a/Makefile b/Makefile index ee743adbb..2bcd01707 100644 --- a/Makefile +++ b/Makefile @@ -155,6 +155,10 @@ test-fast: test-e2e: $(TEST_LOG_PARAMS) cargo nextest run -E 'test(::e2e::)' +.PHONY: test-database +test-database: + $(TEST_LOG_PARAMS) cargo nextest run -E 'test(::database::)' + ############################################################################### # Benchmarking ############################################################################### diff --git a/deny.toml b/deny.toml index fccb3a1b9..22663e4f4 100644 --- a/deny.toml +++ b/deny.toml @@ -111,5 +111,6 @@ allow-org = { github = [ yanked = "deny" # TODO: Remove when patches are available ignore = [ - "RUSTSEC-2023-0071", # https://rustsec.org/advisories/RUSTSEC-2023-0071.html + "RUSTSEC-2023-0071", # https://rustsec.org/advisories/RUSTSEC-2023-0071.html, + "RUSTSEC-2024-0370", # https://rustsec.org/advisories/RUSTSEC-2024-0370.html ] diff --git a/migrations/mysql/20240315194839_accounts_table.sql b/migrations/mysql/20240315194839_accounts_table.sql index 0c9717446..0e2d848a9 100644 --- a/migrations/mysql/20240315194839_accounts_table.sql +++ b/migrations/mysql/20240315194839_accounts_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE accounts( id VARCHAR(100) NOT NULL PRIMARY KEY, account_name VARCHAR(100) NOT NULL, @@ -13,8 +15,9 @@ CREATE TABLE accounts( CREATE UNIQUE INDEX idx_accounts_name ON accounts(account_name); CREATE UNIQUE INDEX idx_accounts_email ON accounts(email); -CREATE UNIQUE INDEX idx_provider_identity_key ON accounts(provider_identity_key); +CREATE UNIQUE INDEX idx_accounts_provider_identity_key ON accounts(provider_identity_key); +/* ------------------------------ */ CREATE TABLE accounts_passwords( account_name VARCHAR(100) NOT NULL, @@ -22,3 +25,5 @@ CREATE TABLE accounts_passwords( ); CREATE UNIQUE INDEX idx_accounts_passwords_account_name ON accounts_passwords(account_name); + +/* ------------------------------ */ diff --git a/migrations/mysql/20240603102429_access_tokens_table.sql b/migrations/mysql/20240603102429_access_tokens_table.sql index 9541faced..c029bc9cc 100644 --- a/migrations/mysql/20240603102429_access_tokens_table.sql +++ b/migrations/mysql/20240603102429_access_tokens_table.sql @@ -1,9 +1,14 @@ +/* ------------------------------ */ + CREATE TABLE access_tokens ( id CHAR(36) NOT NULL PRIMARY KEY, token_name VARCHAR(100) NOT NULL, token_hash BINARY(32) NOT NULL, created_at TIMESTAMP(6) NOT NULL, revoked_at TIMESTAMP(6), + revoked_at_is_null BOOLEAN GENERATED ALWAYS AS (revoked_at IS NULL), account_id VARCHAR(100) NOT NULL REFERENCES accounts(id), - UNIQUE KEY idx_account_token_name (account_id, token_name) -); \ No newline at end of file + UNIQUE KEY idx_account_token_name (account_id, token_name, revoked_at_is_null) +); + +/* ------------------------------ */ diff --git a/migrations/mysql/20240823165328_modify_access_token_uniq.sql b/migrations/mysql/20240823165328_modify_access_token_uniq.sql deleted file mode 100644 index 163ed3a3f..000000000 --- a/migrations/mysql/20240823165328_modify_access_token_uniq.sql +++ /dev/null @@ -1,5 +0,0 @@ -ALTER TABLE access_tokens - ADD revoked_at_is_null BOOLEAN GENERATED ALWAYS AS (revoked_at IS NULL); - -CREATE OR REPLACE UNIQUE INDEX idx_account_token_name - ON access_tokens (account_id, token_name, revoked_at_is_null); diff --git a/migrations/postgres/20240314140913_accounts_table.sql b/migrations/postgres/20240314140913_accounts_table.sql index d6fcf666d..6a8f2df40 100644 --- a/migrations/postgres/20240314140913_accounts_table.sql +++ b/migrations/postgres/20240314140913_accounts_table.sql @@ -1,5 +1,9 @@ +/* ------------------------------ */ + CREATE TYPE account_type AS ENUM ('user', 'organization'); +/* ------------------------------ */ + CREATE TABLE accounts( id VARCHAR(100) NOT NULL PRIMARY KEY, account_name VARCHAR(100) NOT NULL, @@ -15,8 +19,9 @@ CREATE TABLE accounts( CREATE UNIQUE INDEX idx_accounts_name ON accounts(account_name); CREATE UNIQUE INDEX idx_accounts_email ON accounts(email); -CREATE UNIQUE INDEX idx_provider_identity_key ON accounts(provider_identity_key); +CREATE UNIQUE INDEX idx_accounts_provider_identity_key ON accounts(provider_identity_key); +/* ------------------------------ */ CREATE TABLE accounts_passwords( account_name VARCHAR(100) NOT NULL, @@ -24,3 +29,5 @@ CREATE TABLE accounts_passwords( ); CREATE UNIQUE INDEX idx_accounts_passwords_account_name ON accounts_passwords(account_name); + +/* ------------------------------ */ diff --git a/migrations/postgres/20240328152300_persistent_tasks.sql b/migrations/postgres/20240328152300_persistent_tasks.sql new file mode 100644 index 000000000..db665a919 --- /dev/null +++ b/migrations/postgres/20240328152300_persistent_tasks.sql @@ -0,0 +1,34 @@ +/* ------------------------------ */ + +CREATE SEQUENCE task_event_id_seq AS BIGINT; +CREATE SEQUENCE task_id_seq AS BIGINT; + +/* ------------------------------ */ + +CREATE TYPE task_status_type AS ENUM ('queued', 'running', 'finished'); + +/* ------------------------------ */ + +CREATE TABLE tasks +( + task_id BIGINT NOT NULL PRIMARY KEY, + dataset_id VARCHAR(100), + task_status task_status_type NOT NULL +); + +CREATE INDEX idx_tasks_dataset_id ON tasks (dataset_id) WHERE dataset_id IS NOT NULL; +CREATE INDEX idx_tasks_task_status ON tasks(task_status) WHERE task_status != 'finished'; + +/* ------------------------------ */ + +CREATE TABLE task_events ( + event_id BIGINT PRIMARY KEY DEFAULT NEXTVAL('task_event_id_seq'), + task_id BIGINT NOT NULL REFERENCES tasks(task_id), + event_time timestamptz NOT NULL, + event_type VARCHAR(50) NOT NULL, + event_payload JSONB NOT NULL +); + +CREATE INDEX idx_task_events_task_id ON task_events (task_id); + +/* ------------------------------ */ diff --git a/migrations/postgres/20240328152300_task_events_table.sql b/migrations/postgres/20240328152300_task_events_table.sql deleted file mode 100644 index 3d2f1b1f6..000000000 --- a/migrations/postgres/20240328152300_task_events_table.sql +++ /dev/null @@ -1,14 +0,0 @@ -CREATE SEQUENCE task_id_seq AS BIGINT; - -CREATE TABLE task_events ( - event_id BIGSERIAL NOT NULL, - PRIMARY KEY (event_id), - task_id BIGINT NOT NULL, - dataset_id VARCHAR(100), - event_time timestamptz NOT NULL, - event_type VARCHAR(50) NOT NULL, - event_payload JSONB NOT NULL -); - -CREATE INDEX task_events_task_id_idx ON task_events (task_id); -CREATE INDEX task_events_dataset_id_idx On task_events(dataset_id); diff --git a/migrations/postgres/20240422100311_flow_configuration_events.sql b/migrations/postgres/20240422100311_flow_configuration_events.sql index 9b2914b61..4b6f19044 100644 --- a/migrations/postgres/20240422100311_flow_configuration_events.sql +++ b/migrations/postgres/20240422100311_flow_configuration_events.sql @@ -1,5 +1,9 @@ +/* ------------------------------ */ + CREATE SEQUENCE flow_configuration_event_id_seq AS BIGINT; +/* ------------------------------ */ + -- Add a value to enum: -- ALTER TYPE system_flow_type ADD VALUE 'new_value'; -- @@ -7,30 +11,32 @@ CREATE SEQUENCE flow_configuration_event_id_seq AS BIGINT; -- ALTER TYPE system_flow_type RENAME VALUE 'existing_value' TO 'new_value'; CREATE TYPE system_flow_type AS ENUM ('gc'); -CREATE TABLE system_flow_configuration_events -( - event_id BIGINT PRIMARY KEY DEFAULT NEXTVAL('flow_configuration_event_id_seq'), - system_flow_type system_flow_type NOT NULL, - event_type VARCHAR(50) NOT NULL, - event_time TIMESTAMPTZ NOT NULL, - event_payload JSONB NOT NULL -); - -- Add a value to enum: -- ALTER TYPE dataset_flow_type ADD VALUE 'new_value'; -- -- Rename an enum value: -- ALTER TYPE dataset_flow_type RENAME VALUE 'existing_value' TO 'new_value'; -CREATE TYPE dataset_flow_type AS ENUM ('ingest', 'execute_transform', 'hard_compaction'); +CREATE TYPE dataset_flow_type AS ENUM ('ingest', 'execute_transform', 'hard_compaction', 'reset'); + +/* ------------------------------ */ -CREATE TABLE dataset_flow_configuration_events +CREATE TABLE flow_configuration_events ( event_id BIGINT PRIMARY KEY DEFAULT NEXTVAL('flow_configuration_event_id_seq'), - dataset_id VARCHAR(100) NOT NULL, - dataset_flow_type dataset_flow_type NOT NULL, + dataset_id VARCHAR(100), + dataset_flow_type dataset_flow_type, + system_flow_type system_flow_type, event_type VARCHAR(50) NOT NULL, event_time TIMESTAMPTZ NOT NULL, event_payload JSONB NOT NULL ); -CREATE INDEX dataset_flow_configuration_events_dataset_id_idx ON dataset_flow_configuration_events (dataset_id, dataset_flow_type); +CREATE INDEX idx_flow_configuration_events_dataset_flow_key + ON flow_configuration_events (dataset_id, dataset_flow_type) + WHERE dataset_id IS NOT NULL; + +CREATE INDEX idx_flow_configuration_events_system_flow_key + ON flow_configuration_events (system_flow_type) + WHERE system_flow_type IS NOT NULL; + +/* ------------------------------ */ diff --git a/migrations/postgres/20240603102429_access_tokens_table.sql b/migrations/postgres/20240603102429_access_tokens_table.sql index 0c014c25a..89717f7da 100644 --- a/migrations/postgres/20240603102429_access_tokens_table.sql +++ b/migrations/postgres/20240603102429_access_tokens_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE access_tokens( id uuid NOT NULL PRIMARY KEY, token_name VARCHAR(100) NOT NULL, @@ -8,4 +10,8 @@ CREATE TABLE access_tokens( account_id VARCHAR(100) NOT NULL REFERENCES accounts(id) ); -CREATE UNIQUE INDEX idx_account_token_name ON access_tokens(account_id, token_name); \ No newline at end of file +CREATE UNIQUE INDEX idx_access_tokens_account_id_token_name + ON access_tokens(account_id, token_name) + WHERE revoked_at IS NULL; + +/* ------------------------------ */ diff --git a/migrations/postgres/20240701092841_dataset_env_vars_table.sql b/migrations/postgres/20240701092841_dataset_env_vars_table.sql index 88c367348..a2fba3bc6 100644 --- a/migrations/postgres/20240701092841_dataset_env_vars_table.sql +++ b/migrations/postgres/20240701092841_dataset_env_vars_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE dataset_env_vars( id UUID PRIMARY KEY, key VARCHAR(200) NOT NULL, @@ -7,6 +9,8 @@ CREATE TABLE dataset_env_vars( dataset_id VARCHAR(100) NOT NULL ); -CREATE UNIQUE INDEX idx_env_key_dataset ON dataset_env_vars(dataset_id, key); +CREATE UNIQUE INDEX idx_dataset_env_vars_dataset_id_key ON dataset_env_vars(dataset_id, key); + +CREATE INDEX idx_dataset_env_vars_dataset_id ON dataset_env_vars(dataset_id); -CREATE INDEX dataset_env_var_dataset_id_idx ON dataset_env_vars(dataset_id); \ No newline at end of file +/* ------------------------------ */ diff --git a/migrations/postgres/20240710191232_outbox_messages_consumptions.sql b/migrations/postgres/20240710191232_outbox_messages_consumptions.sql index 80f218f8d..105d5fafb 100644 --- a/migrations/postgres/20240710191232_outbox_messages_consumptions.sql +++ b/migrations/postgres/20240710191232_outbox_messages_consumptions.sql @@ -1,5 +1,9 @@ +/* ------------------------------ */ + CREATE SEQUENCE outbox_message_id_seq AS BIGINT; +/* ------------------------------ */ + CREATE TABLE outbox_messages( message_id BIGINT PRIMARY KEY DEFAULT NEXTVAL('outbox_message_id_seq'), producer_name VARCHAR(200) NOT NULL, @@ -7,7 +11,9 @@ CREATE TABLE outbox_messages( occurred_on timestamptz NOT NULL ); -CREATE INDEX outbox_messages_producer_name_idx ON outbox_messages(producer_name); +CREATE INDEX idx_outbox_messages_producer_name ON outbox_messages(producer_name); + +/* ------------------------------ */ CREATE TABLE outbox_message_consumptions( consumer_name VARCHAR(200) NOT NULL, @@ -15,3 +21,5 @@ CREATE TABLE outbox_message_consumptions( last_consumed_message_id BIGINT NOT NULL, PRIMARY KEY (consumer_name, producer_name) ); + +/* ------------------------------ */ diff --git a/migrations/postgres/20240816113232_add_reset_type.sql b/migrations/postgres/20240816113232_add_reset_type.sql deleted file mode 100644 index d2de870b1..000000000 --- a/migrations/postgres/20240816113232_add_reset_type.sql +++ /dev/null @@ -1 +0,0 @@ -ALTER TYPE dataset_flow_type ADD VALUE 'reset'; \ No newline at end of file diff --git a/migrations/postgres/20240819135229_modify_access_token_uniq.sql b/migrations/postgres/20240819135229_modify_access_token_uniq.sql deleted file mode 100644 index f6baf1d14..000000000 --- a/migrations/postgres/20240819135229_modify_access_token_uniq.sql +++ /dev/null @@ -1,5 +0,0 @@ -DROP INDEX idx_account_token_name; - -CREATE UNIQUE INDEX idx_account_token_name -ON access_tokens (account_id, token_name) -WHERE revoked_at IS NULL; diff --git a/migrations/postgres/20240823075029_persistent_flows.sql b/migrations/postgres/20240823075029_persistent_flows.sql new file mode 100644 index 000000000..dda435e7f --- /dev/null +++ b/migrations/postgres/20240823075029_persistent_flows.sql @@ -0,0 +1,39 @@ +/* ------------------------------ */ + +CREATE SEQUENCE flow_event_id_seq AS BIGINT; +CREATE SEQUENCE flow_id_seq AS BIGINT; + +/* ------------------------------ */ + +CREATE TYPE flow_status_type AS ENUM ('waiting', 'running', 'finished'); + +/* ------------------------------ */ + +CREATE TABLE flows +( + flow_id BIGINT NOT NULL PRIMARY KEY, + dataset_id VARCHAR(100), + dataset_flow_type dataset_flow_type, + system_flow_type system_flow_type, + initiator VARCHAR(100) NOT NULL, /* No referential integrity with account_id, as it can system initiator value */ + flow_status flow_status_type NOT NULL +); + +CREATE INDEX idx_flows_dataset_id ON flows (dataset_id) WHERE dataset_id IS NOT NULL; +CREATE INDEX idx_flows_system_flow_type ON flows (system_flow_type) WHERE system_flow_type IS NOT NULL; +CREATE INDEX idx_flows_flow_status ON flows(flow_status) WHERE flow_status != 'finished'; + +/* ------------------------------ */ + +CREATE TABLE flow_events +( + event_id BIGINT PRIMARY KEY DEFAULT NEXTVAL('flow_event_id_seq'), + flow_id BIGINT NOT NULL REFERENCES flows(flow_id), + event_type VARCHAR(50) NOT NULL, + event_time TIMESTAMPTZ NOT NULL, + event_payload JSONB NOT NULL +); + +CREATE INDEX idx_flow_events_flow_id ON flow_events (flow_id); + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240314140913_accounts_table.sql b/migrations/sqlite/20240314140913_accounts_table.sql index ad5a18d04..976f8d71e 100644 --- a/migrations/sqlite/20240314140913_accounts_table.sql +++ b/migrations/sqlite/20240314140913_accounts_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE accounts ( id VARCHAR(100) NOT NULL PRIMARY KEY, @@ -14,8 +16,9 @@ CREATE TABLE accounts CREATE UNIQUE INDEX idx_accounts_name ON accounts(account_name); CREATE UNIQUE INDEX idx_accounts_email ON accounts(email); -CREATE UNIQUE INDEX idx_provider_identity_key ON accounts(provider_identity_key); +CREATE UNIQUE INDEX idx_accounts_provider_identity_key ON accounts(provider_identity_key); +/* ------------------------------ */ CREATE TABLE accounts_passwords( account_name VARCHAR(100) NOT NULL, @@ -23,3 +26,5 @@ CREATE TABLE accounts_passwords( ); CREATE UNIQUE INDEX idx_accounts_passwords_account_name ON accounts_passwords(account_name); + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240328152300_persistent_tasks.sql b/migrations/sqlite/20240328152300_persistent_tasks.sql new file mode 100644 index 000000000..71d94e939 --- /dev/null +++ b/migrations/sqlite/20240328152300_persistent_tasks.sql @@ -0,0 +1,38 @@ +/* ------------------------------ */ + +CREATE TABLE task_ids ( + task_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + created_time timestamptz NOT NULL +); + +/* ------------------------------ */ + +CREATE TABLE tasks +( + task_id BIGINT NOT NULL PRIMARY KEY REFERENCES task_ids(task_id), + dataset_id VARCHAR(100), + task_status VARCHAR(10) CHECK ( + task_status IN ( + 'queued', + 'running', + 'finished' + ) + ) NOT NULL +); + +CREATE INDEX idx_tasks_dataset_id ON tasks (dataset_id) WHERE dataset_id IS NOT NULL; +CREATE INDEX idx_tasks_task_status ON tasks (task_status) WHERE task_status != 'finished'; + +/* ------------------------------ */ + +CREATE TABLE task_events ( + event_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + task_id BIGINT NOT NULL REFERENCES tasks(task_id), + event_time timestamptz NOT NULL, + event_type VARCHAR(50) NOT NULL, + event_payload JSONB NOT NULL +); + +CREATE INDEX idx_task_events_task_id ON task_events (task_id); + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240328152300_task_events_table.sql b/migrations/sqlite/20240328152300_task_events_table.sql deleted file mode 100644 index ee4506ef5..000000000 --- a/migrations/sqlite/20240328152300_task_events_table.sql +++ /dev/null @@ -1,16 +0,0 @@ -CREATE TABLE tasks ( - task_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, - created_time timestamptz NOT NULL -); - -CREATE TABLE task_events ( - event_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, - task_id BIGINT NOT NULL, - dataset_id VARCHAR(100), - event_time timestamptz NOT NULL, - event_type VARCHAR(50) NOT NULL, - event_payload JSONB NOT NULL -); - -CREATE INDEX task_events_task_id_idx ON task_events (task_id); -CREATE INDEX task_events_dataset_id_idx On task_events(dataset_id); \ No newline at end of file diff --git a/migrations/sqlite/20240422100311_flow_configuration_events.sql b/migrations/sqlite/20240422100311_flow_configuration_events.sql index 26e987489..2332b27f4 100644 --- a/migrations/sqlite/20240422100311_flow_configuration_events.sql +++ b/migrations/sqlite/20240422100311_flow_configuration_events.sql @@ -1,26 +1,33 @@ -CREATE TABLE flow_configuration_event -( - event_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, - created_time timestamptz NOT NULL -); +/* ------------------------------ */ -CREATE TABLE system_flow_configuration_events +CREATE TABLE flow_configuration_events ( - event_id INTEGER PRIMARY KEY NOT NULL, - system_flow_type VARCHAR(10) CHECK ( system_flow_type IN ('gc') ) NOT NULL, - event_type VARCHAR(50) NOT NULL, - event_time TIMESTAMPTZ NOT NULL, - event_payload JSONB NOT NULL + event_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + created_time timestamptz NOT NULL, + dataset_id VARCHAR(100), + dataset_flow_type VARCHAR(20) CHECK ( + dataset_flow_type IN ( + 'ingest', + 'execute_transform', + 'hard_compaction', + 'reset' + ) + ), + system_flow_type VARCHAR(10) CHECK ( + system_flow_type IN ('gc') + ), + event_type VARCHAR(50) NOT NULL, + event_time TIMESTAMPTZ NOT NULL, + event_payload JSONB NOT NULL ); -CREATE TABLE dataset_flow_configuration_events -( - event_id INTEGER PRIMARY KEY NOT NULL, - dataset_id VARCHAR(100) NOT NULL, - dataset_flow_type VARCHAR(20) CHECK ( dataset_flow_type IN ('ingest', 'execute_transform', 'hard_compaction', 'reset') ) NOT NULL, - event_type VARCHAR(50) NOT NULL, - event_time TIMESTAMPTZ NOT NULL, - event_payload JSONB NOT NULL -); -CREATE INDEX dataset_flow_configuration_events_dataset_id_idx ON dataset_flow_configuration_events (dataset_id, dataset_flow_type); +CREATE INDEX idx_flow_configuration_events_dataset_id_idx + ON flow_configuration_events (dataset_id, dataset_flow_type) + WHERE dataset_id IS NOT NULL; + +CREATE INDEX idx_flow_configuration_events_system_flow_type_idx + ON flow_configuration_events (system_flow_type) + WHERE system_flow_type IS NOT NULL; + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240603102429_access_tokens_table.sql b/migrations/sqlite/20240603102429_access_tokens_table.sql index 434e33ee4..16ef019c9 100644 --- a/migrations/sqlite/20240603102429_access_tokens_table.sql +++ b/migrations/sqlite/20240603102429_access_tokens_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE access_tokens( id VARCHAR(36) NOT NULL PRIMARY KEY, token_name VARCHAR(100) NOT NULL, @@ -8,4 +10,8 @@ CREATE TABLE access_tokens( account_id VARCHAR(100) NOT NULL REFERENCES accounts(id) ); -CREATE UNIQUE INDEX idx_account_token_name ON access_tokens(account_id, token_name); \ No newline at end of file +CREATE UNIQUE INDEX idx_access_tokens_account_token_name + ON access_tokens(account_id, token_name) + WHERE revoked_at IS NULL; + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240701092841_dataset_env_vars_table.sql b/migrations/sqlite/20240701092841_dataset_env_vars_table.sql index d7066124c..685427477 100644 --- a/migrations/sqlite/20240701092841_dataset_env_vars_table.sql +++ b/migrations/sqlite/20240701092841_dataset_env_vars_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE dataset_env_vars( id VARCHAR(36) NOT NULL PRIMARY KEY, key VARCHAR(200) NOT NULL, @@ -7,6 +9,8 @@ CREATE TABLE dataset_env_vars( dataset_id VARCHAR(100) NOT NULL ); -CREATE UNIQUE INDEX idx_env_key_dataset ON dataset_env_vars(dataset_id, key); +CREATE UNIQUE INDEX idx_dataset_env_vars_key_dataset ON dataset_env_vars(dataset_id, key); + +CREATE INDEX idx_dataset_env_vars_dataset_id ON dataset_env_vars(dataset_id); -CREATE INDEX dataset_env_var_dataset_id_idx ON dataset_env_vars(dataset_id); \ No newline at end of file +/* ------------------------------ */ diff --git a/migrations/sqlite/20240710205713_outbox_messages_consumptions.sql b/migrations/sqlite/20240710205713_outbox_messages_consumptions.sql index 8ad974746..9d2e3a253 100644 --- a/migrations/sqlite/20240710205713_outbox_messages_consumptions.sql +++ b/migrations/sqlite/20240710205713_outbox_messages_consumptions.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE outbox_messages( message_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, producer_name VARCHAR(200) NOT NULL, @@ -5,7 +7,9 @@ CREATE TABLE outbox_messages( occurred_on timestamptz NOT NULL ); -CREATE INDEX outbox_messages_producer_name_idx ON outbox_messages(producer_name); +CREATE INDEX idx_outbox_messages_producer_name ON outbox_messages(producer_name); + +/* ------------------------------ */ CREATE TABLE outbox_message_consumptions( consumer_name VARCHAR(200) NOT NULL, @@ -14,3 +18,4 @@ CREATE TABLE outbox_message_consumptions( PRIMARY KEY(consumer_name, producer_name) ); +/* ------------------------------ */ diff --git a/migrations/sqlite/20240813153516_dataset_entries_table.sql b/migrations/sqlite/20240813153516_dataset_entries_table.sql index dd980caaf..9ec5130bf 100644 --- a/migrations/sqlite/20240813153516_dataset_entries_table.sql +++ b/migrations/sqlite/20240813153516_dataset_entries_table.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE dataset_entries ( dataset_id VARCHAR(100) NOT NULL PRIMARY KEY, @@ -6,8 +8,10 @@ CREATE TABLE dataset_entries created_at timestamptz NOT NULL ); -CREATE INDEX idx_owner_id +CREATE INDEX idx_dataset_entries_owner_id ON dataset_entries (owner_id); -CREATE UNIQUE INDEX idx_uniq_owner_id_dataset_name +CREATE UNIQUE INDEX idx_dataset_entries_owner_id_dataset_name ON dataset_entries (owner_id, dataset_name); + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240814131142_auth_rebac_tables.sql b/migrations/sqlite/20240814131142_auth_rebac_tables.sql index 32c908e42..85e2eea33 100644 --- a/migrations/sqlite/20240814131142_auth_rebac_tables.sql +++ b/migrations/sqlite/20240814131142_auth_rebac_tables.sql @@ -1,3 +1,5 @@ +/* ------------------------------ */ + CREATE TABLE auth_rebac_properties ( entity_type VARCHAR(25) NOT NULL, @@ -6,13 +8,13 @@ CREATE TABLE auth_rebac_properties property_value VARCHAR(50) NOT NULL ); -CREATE INDEX idx_entity +CREATE INDEX idx_auth_rebac_properties_entity ON auth_rebac_properties (entity_type, entity_id); -CREATE UNIQUE INDEX idx_uniq_entity_property_name +CREATE UNIQUE INDEX idx_auth_rebac_properties_entity_property_name ON auth_rebac_properties (entity_type, entity_id, property_name); ------------------------------------------------------------------------------------------------------------------------- +/* ------------------------------ */ CREATE TABLE auth_rebac_relations ( @@ -23,6 +25,9 @@ CREATE TABLE auth_rebac_relations object_entity_id VARCHAR(100) NOT NULL ); -CREATE UNIQUE INDEX idx_uniq_row - ON auth_rebac_relations (subject_entity_type, subject_entity_id, relationship, object_entity_type, - object_entity_id); +CREATE UNIQUE INDEX idx_auth_rebac_relations_row + ON auth_rebac_relations ( + subject_entity_type, subject_entity_id, relationship, object_entity_type, object_entity_id + ); + +/* ------------------------------ */ diff --git a/migrations/sqlite/20240819135229_modify_access_token_uniq.sql b/migrations/sqlite/20240819135229_modify_access_token_uniq.sql deleted file mode 100644 index df375a507..000000000 --- a/migrations/sqlite/20240819135229_modify_access_token_uniq.sql +++ /dev/null @@ -1,5 +0,0 @@ -DROP INDEX IF EXISTS idx_account_token_name; - -CREATE UNIQUE INDEX idx_account_token_name -ON access_tokens (account_id, token_name) -WHERE revoked_at IS NULL; diff --git a/migrations/sqlite/20240823083159_persistent_flows.sql b/migrations/sqlite/20240823083159_persistent_flows.sql new file mode 100644 index 000000000..46cc0c632 --- /dev/null +++ b/migrations/sqlite/20240823083159_persistent_flows.sql @@ -0,0 +1,53 @@ +/* ------------------------------ */ + +CREATE TABLE flow_ids +( + flow_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + created_time timestamptz NOT NULL +); + +/* ------------------------------ */ + +CREATE TABLE flows +( + flow_id BIGINT NOT NULL PRIMARY KEY REFERENCES flow_ids(flow_id), + dataset_id VARCHAR(100), + dataset_flow_type VARCHAR(20) CHECK ( + dataset_flow_type IN ( + 'ingest', + 'execute_transform', + 'hard_compaction', + 'reset' + ) + ), + system_flow_type VARCHAR(10) CHECK ( + system_flow_type IN ('gc') + ), + initiator VARCHAR(100) NOT NULL, /* No referential integrity with account_id, as it can system initiator value */ + flow_status VARCHAR(10) CHECK ( + flow_status IN ( + 'waiting', + 'running', + 'finished' + ) + ) NOT NULL +); + +CREATE INDEX idx_flows_dataset_id ON flows (dataset_id) WHERE dataset_id IS NOT NULL; +CREATE INDEX idx_flows_system_flow_type ON flows (system_flow_type) WHERE system_flow_type IS NOT NULL; +CREATE INDEX idx_flows_flow_status ON flows(flow_status) WHERE flow_status != 'finished'; + +/* ------------------------------ */ + +CREATE TABLE flow_events +( + event_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + flow_id BIGINT NOT NULL REFERENCES flows(flow_id), + event_type VARCHAR(50) NOT NULL, + event_time TIMESTAMPTZ NOT NULL, + event_payload JSONB NOT NULL +); + +CREATE INDEX idx_flow_events_flow_id ON flow_events (flow_id); + +/* ------------------------------ */ diff --git a/resources/schema.gql b/resources/schema.gql index 390152a58..58b51d903 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -1402,13 +1402,6 @@ type Mutation { system. This groups deals with their identities and permissions. """ accounts: AccountsMut! - """ - Tasks-related functionality group. - - Tasks are units of work scheduled and executed by the system to query - and process data. - """ - tasks: TasksMut! } type NoChanges implements CommitResult & UpdateReadmeResult { @@ -1489,14 +1482,6 @@ type Query { """ accounts: Accounts! """ - Task-related functionality group. - - Tasks are units of scheduling that can perform many functions like - ingesting new data, running dataset transformations, answering ad-hoc - queries etc. - """ - tasks: Tasks! - """ Search-related functionality group """ search: Search! @@ -1813,26 +1798,6 @@ type Task { finishedAt: DateTime } -type TaskConnection { - """ - A shorthand for `edges { node { ... } }` - """ - nodes: [Task!]! - """ - Approximate number of total nodes - """ - totalCount: Int! - """ - Page information - """ - pageInfo: PageBasedInfo! - edges: [TaskEdge!]! -} - -type TaskEdge { - node: Task! -} - scalar TaskID """ @@ -1871,30 +1836,6 @@ enum TaskStatus { FINISHED } -type Tasks { - """ - Returns current state of a given task - """ - getTask(taskId: TaskID!): Task - """ - Returns states of tasks associated with a given dataset ordered by - creation time from newest to oldest - """ - listTasksByDataset(datasetId: DatasetID!, page: Int, perPage: Int): TaskConnection! -} - -type TasksMut { - """ - Requests cancellation of the specified task - """ - cancelTask(taskId: TaskID!): Task! - """ - Schedules a task to update the specified dataset by performing polling - ingest or a derivative transformation - """ - createProbeTask(datasetId: DatasetID, busyTimeMs: Int, endWithOutcome: TaskOutcome): Task! -} - type TemporalTable { name: String! primaryKey: [String!]! diff --git a/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs b/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs index 5763fccd6..c87fe196e 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs @@ -71,7 +71,7 @@ impl DatasetFlowRunsMut { // TODO: for some datasets launching manually might not be an option: // i.e., root datasets with push sources require input data to arrive - let flow_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog::(ctx).unwrap(); let logged_account = utils::get_logged_account(ctx); let flow_run_snapshot = match FlowRunConfiguration::try_into_snapshot( @@ -86,7 +86,7 @@ impl DatasetFlowRunsMut { Err(err) => return Ok(TriggerFlowResult::InvalidRunConfigurations(err)), }; - let flow_state = flow_service + let flow_state = flow_query_service .trigger_manual_flow( Utc::now(), fs::FlowKeyDataset::new(self.dataset_handle.id.clone(), dataset_flow_type.into()) @@ -121,8 +121,8 @@ impl DatasetFlowRunsMut { } // Attempt cancelling scheduled tasks - let flow_service = from_catalog::(ctx).unwrap(); - let flow_state = flow_service + let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_state = flow_query_service .cancel_scheduled_tasks(flow_id.into()) .await .map_err(|e| match e { diff --git a/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs b/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs index 37085957a..5848ea1f0 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs @@ -37,9 +37,9 @@ pub(crate) async fn check_if_flow_belongs_to_dataset( flow_id: FlowID, dataset_handle: &odf::DatasetHandle, ) -> Result> { - let flow_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog::(ctx).unwrap(); - match flow_service.get_flow(flow_id.into()).await { + match flow_query_service.get_flow(flow_id.into()).await { Ok(flow_state) => match flow_state.flow_key { fs::FlowKey::Dataset(fk_dataset) => { if fk_dataset.dataset_id != dataset_handle.id { diff --git a/src/adapter/graphql/src/mutations/mod.rs b/src/adapter/graphql/src/mutations/mod.rs index dbca07c0b..b3f8cdd6a 100644 --- a/src/adapter/graphql/src/mutations/mod.rs +++ b/src/adapter/graphql/src/mutations/mod.rs @@ -17,7 +17,6 @@ mod dataset_mut; mod datasets_mut; mod flows_mut; mod metadata_chain_mut; -mod tasks_mut; pub(crate) use account_mut::*; pub(crate) use accounts_mut::*; @@ -28,4 +27,3 @@ pub(crate) use dataset_mut::*; pub(crate) use datasets_mut::*; pub(crate) use flows_mut::*; pub(crate) use metadata_chain_mut::*; -pub(crate) use tasks_mut::*; diff --git a/src/adapter/graphql/src/mutations/tasks_mut.rs b/src/adapter/graphql/src/mutations/tasks_mut.rs deleted file mode 100644 index 6e470be85..000000000 --- a/src/adapter/graphql/src/mutations/tasks_mut.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use kamu_task_system as ts; - -use crate::prelude::*; -use crate::queries::Task; -use crate::LoggedInGuard; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct TasksMut; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -// TODO: Error handling -#[Object] -impl TasksMut { - /// Requests cancellation of the specified task - #[graphql(guard = "LoggedInGuard::new()")] - async fn cancel_task(&self, ctx: &Context<'_>, task_id: TaskID) -> Result { - let task_sched = from_catalog::(ctx).unwrap(); - let task_state = task_sched.cancel_task(task_id.into()).await.int_err()?; - Ok(Task::new(task_state)) - } - - /// Schedules a task to update the specified dataset by performing polling - /// ingest or a derivative transformation - #[graphql(guard = "LoggedInGuard::new()")] - async fn create_probe_task( - &self, - ctx: &Context<'_>, - dataset_id: Option, - busy_time_ms: Option, - end_with_outcome: Option, - ) -> Result { - let task_sched = from_catalog::(ctx).unwrap(); - let task_state = task_sched - .create_task(ts::LogicalPlan::Probe(ts::Probe { - dataset_id: dataset_id.map(Into::into), - busy_time: busy_time_ms.map(std::time::Duration::from_millis), - end_with_outcome: end_with_outcome.map(|o| match o { - TaskOutcome::Success => ts::TaskOutcome::Success(ts::TaskResult::Empty), - TaskOutcome::Failed => ts::TaskOutcome::Failed(ts::TaskError::Empty), - TaskOutcome::Cancelled => ts::TaskOutcome::Cancelled, - }), - })) - .await - .int_err()?; - Ok(Task::new(task_state)) - } -} diff --git a/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs b/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs index 7164e2b25..87d32cfe0 100644 --- a/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs +++ b/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs @@ -9,6 +9,7 @@ use std::collections::HashSet; +use database_common::PaginationOpts; use futures::TryStreamExt; use kamu::utils::datasets_filtering::filter_datasets_by_local_pattern; use kamu_accounts::Account as AccountEntity; @@ -40,7 +41,7 @@ impl AccountFlowRuns { per_page: Option, filters: Option, ) -> Result { - let flow_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog::(ctx).unwrap(); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); @@ -71,11 +72,11 @@ impl AccountFlowRuns { None => None, }; - let flows_state_listing = flow_service + let flows_state_listing = flow_query_service .list_all_flows_by_account( &self.account.id, filters.unwrap_or_default(), - fs::FlowPaginationOpts { + PaginationOpts { offset: page * per_page, limit: per_page, }, @@ -99,9 +100,9 @@ impl AccountFlowRuns { } async fn list_datasets_with_flow(&self, ctx: &Context<'_>) -> Result { - let flow_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog::(ctx).unwrap(); - let datasets_with_flows: Vec<_> = flow_service + let datasets_with_flows: Vec<_> = flow_query_service .list_all_datasets_with_flow_by_account(&self.account.id) .await .int_err()? diff --git a/src/adapter/graphql/src/queries/auth.rs b/src/adapter/graphql/src/queries/auth.rs index c6443de23..2297aa5e6 100644 --- a/src/adapter/graphql/src/queries/auth.rs +++ b/src/adapter/graphql/src/queries/auth.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use super::ViewAccessToken; use crate::prelude::*; @@ -19,7 +19,7 @@ pub struct Auth; #[Object] impl Auth { - const DEFAULT_PER_PAGE: i64 = 15; + const DEFAULT_PER_PAGE: usize = 15; #[allow(clippy::unused_async)] async fn enabled_login_methods(&self, ctx: &Context<'_>) -> Result> { @@ -33,8 +33,8 @@ impl Auth { &self, ctx: &Context<'_>, account_id: AccountID, - page: Option, - per_page: Option, + page: Option, + per_page: Option, ) -> Result { check_logged_account_id_match(ctx, &account_id)?; @@ -47,7 +47,7 @@ impl Auth { let access_token_listing = access_token_service .get_access_tokens_by_account_id( &account_id, - &DatabasePaginationOpts { + &PaginationOpts { offset: page * per_page, limit: per_page, }, @@ -63,8 +63,8 @@ impl Auth { Ok(AccessTokenConnection::new( access_tokens, - usize::try_from(page).unwrap(), - usize::try_from(per_page).unwrap(), + page, + per_page, access_token_listing.total_count, )) } diff --git a/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs b/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs index 8d2d559d7..d966fb3c3 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use kamu_datasets::{DatasetEnvVarService, GetDatasetEnvVarError}; use opendatafabric as odf; @@ -23,7 +23,7 @@ pub struct DatasetEnvVars { #[Object] impl DatasetEnvVars { - const DEFAULT_PER_PAGE: i64 = 15; + const DEFAULT_PER_PAGE: usize = 15; #[graphql(skip)] pub fn new(dataset_handle: odf::DatasetHandle) -> Self { @@ -54,8 +54,8 @@ impl DatasetEnvVars { async fn list_env_variables( &self, ctx: &Context<'_>, - page: Option, - per_page: Option, + page: Option, + per_page: Option, ) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; @@ -66,7 +66,7 @@ impl DatasetEnvVars { let dataset_env_var_listing = dataset_env_var_service .get_all_dataset_env_vars_by_dataset_id( &self.dataset_handle.id, - Some(DatabasePaginationOpts { + Some(PaginationOpts { offset: (page * per_page), limit: per_page, }), @@ -82,8 +82,8 @@ impl DatasetEnvVars { Ok(ViewDatasetEnvVarConnection::new( dataset_env_vars, - usize::try_from(page).unwrap(), - usize::try_from(per_page).unwrap(), + page, + per_page, dataset_env_var_listing.total_count, )) } diff --git a/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs b/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs index f02071bbe..0625c5f8c 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs @@ -9,6 +9,7 @@ use std::collections::HashSet; +use database_common::PaginationOpts; use futures::TryStreamExt; use kamu_accounts::AuthenticationService; use {kamu_flow_system as fs, opendatafabric as odf}; @@ -44,8 +45,12 @@ impl DatasetFlowRuns { }); } - let flow_service = from_catalog::(ctx).unwrap(); - let flow_state = flow_service.get_flow(flow_id.into()).await.int_err()?; + let flow_query_service = from_catalog::(ctx).unwrap(); + + let flow_state = flow_query_service + .get_flow(flow_id.into()) + .await + .int_err()?; Ok(GetFlowResult::Success(GetFlowSuccess { flow: Flow::new(flow_state), @@ -61,7 +66,7 @@ impl DatasetFlowRuns { ) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; - let flow_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog::(ctx).unwrap(); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); @@ -89,11 +94,11 @@ impl DatasetFlowRuns { let filters = filters.unwrap_or_default(); - let flows_state_listing = flow_service + let flows_state_listing = flow_query_service .list_all_flows_by_dataset( &self.dataset_handle.id, filters, - fs::FlowPaginationOpts { + PaginationOpts { offset: page * per_page, limit: per_page, }, @@ -119,9 +124,9 @@ impl DatasetFlowRuns { async fn list_flow_initiators(&self, ctx: &Context<'_>) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; - let flow_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog::(ctx).unwrap(); - let flow_initiator_ids: Vec<_> = flow_service + let flow_initiator_ids: Vec<_> = flow_query_service .list_all_flow_initiators_by_dataset(&self.dataset_handle.id) .await .int_err()? diff --git a/src/adapter/graphql/src/queries/flows/flow.rs b/src/adapter/graphql/src/queries/flows/flow.rs index 06751448b..3af8f8024 100644 --- a/src/adapter/graphql/src/queries/flows/flow.rs +++ b/src/adapter/graphql/src/queries/flows/flow.rs @@ -8,14 +8,14 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; -use futures::TryStreamExt; use kamu_core::{DatasetChangesService, PollingIngestService}; use kamu_flow_system::FlowResultDatasetUpdate; -use {kamu_flow_system as fs, kamu_task_system as ts, opendatafabric as odf}; +use {kamu_flow_system as fs, opendatafabric as odf}; use super::{FlowConfigurationSnapshot, FlowEvent, FlowOutcome, FlowStartCondition, FlowTrigger}; use crate::prelude::*; use crate::queries::{Account, Task}; +use crate::utils; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -154,11 +154,9 @@ impl Flow { /// Associated tasks async fn tasks(&self, ctx: &Context<'_>) -> Result> { - let task_scheduler = from_catalog::(ctx).unwrap(); - let mut tasks = Vec::new(); for task_id in &self.flow_state.task_ids { - let ts_task = task_scheduler.get_task(*task_id).await.int_err()?; + let ts_task = utils::get_task(ctx, *task_id).await?; tasks.push(Task::new(ts_task)); } Ok(tasks) @@ -168,9 +166,9 @@ impl Flow { async fn history(&self, ctx: &Context<'_>) -> Result> { let flow_event_store = from_catalog::(ctx).unwrap(); + use futures::TryStreamExt; let flow_events: Vec<_> = flow_event_store .get_events(&self.flow_state.flow_id, Default::default()) - .await .try_collect() .await .int_err()?; diff --git a/src/adapter/graphql/src/queries/tasks/mod.rs b/src/adapter/graphql/src/queries/tasks/mod.rs index 169f2bbdf..c21fe7878 100644 --- a/src/adapter/graphql/src/queries/tasks/mod.rs +++ b/src/adapter/graphql/src/queries/tasks/mod.rs @@ -8,7 +8,5 @@ // by the Apache License, Version 2.0. mod task; -mod tasks; pub(crate) use task::*; -pub(crate) use tasks::*; diff --git a/src/adapter/graphql/src/queries/tasks/task.rs b/src/adapter/graphql/src/queries/tasks/task.rs index 8ea296d53..d80497df6 100644 --- a/src/adapter/graphql/src/queries/tasks/task.rs +++ b/src/adapter/graphql/src/queries/tasks/task.rs @@ -33,7 +33,7 @@ impl Task { /// Life-cycle status of a task async fn status(&self) -> TaskStatus { - (&self.state.status).into() + (&self.state.status()).into() } /// Whether the task was ordered to be cancelled @@ -44,10 +44,7 @@ impl Task { /// Describes a certain final outcome of the task once it reaches the /// "finished" status async fn outcome(&self) -> Option { - match &self.state.status { - ts::TaskStatus::Queued | ts::TaskStatus::Running => None, - ts::TaskStatus::Finished(outcome) => Some(outcome.into()), - } + self.state.outcome.as_ref().map(Into::into) } /// Time when task was originally created and placed in a queue diff --git a/src/adapter/graphql/src/queries/tasks/tasks.rs b/src/adapter/graphql/src/queries/tasks/tasks.rs deleted file mode 100644 index aa94c5f8e..000000000 --- a/src/adapter/graphql/src/queries/tasks/tasks.rs +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use futures::TryStreamExt; -use kamu_task_system as ts; - -use super::Task; -use crate::prelude::*; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct Tasks; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[Object] -impl Tasks { - const DEFAULT_PER_PAGE: usize = 15; - - /// Returns current state of a given task - async fn get_task(&self, ctx: &Context<'_>, task_id: TaskID) -> Result> { - let task_sched = from_catalog::(ctx).unwrap(); - match task_sched.get_task(task_id.into()).await { - Ok(task_state) => Ok(Some(Task::new(task_state))), - Err(ts::GetTaskError::NotFound(_)) => Ok(None), - Err(err) => Err(err.int_err().into()), - } - } - - /// Returns states of tasks associated with a given dataset ordered by - /// creation time from newest to oldest - // TODO: reconsider performance impact - async fn list_tasks_by_dataset( - &self, - ctx: &Context<'_>, - dataset_id: DatasetID, - page: Option, - per_page: Option, - ) -> Result { - let task_sched = from_catalog::(ctx).unwrap(); - - let page = page.unwrap_or(0); - let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); - - let tasks_listing = task_sched - .list_tasks_by_dataset( - &dataset_id, - ts::TaskPaginationOpts { - offset: page * per_page, - limit: per_page, - }, - ) - .await - .int_err()?; - - let nodes: Vec<_> = tasks_listing.stream.map_ok(Task::new).try_collect().await?; - let total_count = tasks_listing.total_count; - - // TODO: We set total to len + 1 to indicate there is a next page. - // We should replace this with unbounded size connection. - Ok(TaskConnection::new(nodes, page, per_page, total_count)) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -page_based_connection!(Task, TaskConnection, TaskEdge); diff --git a/src/adapter/graphql/src/root.rs b/src/adapter/graphql/src/root.rs index d95685fd7..838f27537 100644 --- a/src/adapter/graphql/src/root.rs +++ b/src/adapter/graphql/src/root.rs @@ -46,15 +46,6 @@ impl Query { Accounts } - /// Task-related functionality group. - /// - /// Tasks are units of scheduling that can perform many functions like - /// ingesting new data, running dataset transformations, answering ad-hoc - /// queries etc. - async fn tasks(&self) -> Tasks { - Tasks - } - /// Search-related functionality group async fn search(&self) -> Search { Search @@ -99,14 +90,6 @@ impl Mutation { async fn accounts(&self) -> AccountsMut { AccountsMut } - - /// Tasks-related functionality group. - /// - /// Tasks are units of work scheduled and executed by the system to query - /// and process data. - async fn tasks(&self) -> TasksMut { - TasksMut - } } pub type Schema = async_graphql::Schema; diff --git a/src/adapter/graphql/src/scalars/task_status_outcome.rs b/src/adapter/graphql/src/scalars/task_status_outcome.rs index 85ea0d653..b059ac1b6 100644 --- a/src/adapter/graphql/src/scalars/task_status_outcome.rs +++ b/src/adapter/graphql/src/scalars/task_status_outcome.rs @@ -29,7 +29,7 @@ impl From<&ts::TaskStatus> for TaskStatus { match v { ts::TaskStatus::Queued => Self::Queued, ts::TaskStatus::Running => Self::Running, - ts::TaskStatus::Finished(_) => Self::Finished, + ts::TaskStatus::Finished => Self::Finished, } } } diff --git a/src/adapter/graphql/src/utils.rs b/src/adapter/graphql/src/utils.rs index 7023cdec6..6b389abfa 100644 --- a/src/adapter/graphql/src/utils.rs +++ b/src/adapter/graphql/src/utils.rs @@ -106,8 +106,11 @@ pub(crate) async fn get_task( ctx: &Context<'_>, task_id: ts::TaskID, ) -> Result { - let task_scheduler = from_catalog::(ctx).unwrap(); - task_scheduler.get_task(task_id).await.int_err() + let task_event_store = from_catalog::(ctx).unwrap(); + let task = ts::Task::load(task_id, task_event_store.as_ref()) + .await + .int_err()?; + Ok(task.into()) } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/tests/tests/mod.rs b/src/adapter/graphql/tests/tests/mod.rs index 2a6b39735..6b13cb57f 100644 --- a/src/adapter/graphql/tests/tests/mod.rs +++ b/src/adapter/graphql/tests/tests/mod.rs @@ -20,5 +20,4 @@ mod test_gql_metadata; mod test_gql_metadata_chain; mod test_gql_search; mod test_guards; -mod test_tasks; mod test_update_schema; diff --git a/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs b/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs index 796f4c145..b83dc4c4b 100644 --- a/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs @@ -34,10 +34,9 @@ use kamu_accounts::{JwtAuthenticationConfig, DEFAULT_ACCOUNT_NAME, DEFAULT_ACCOU use kamu_accounts_inmem::InMemoryAccessTokenRepository; use kamu_accounts_services::{AccessTokenServiceImpl, AuthenticationServiceImpl}; use kamu_core::*; -use kamu_flow_system::FlowServiceRunConfig; +use kamu_flow_system::FlowExecutorConfig; use kamu_flow_system_inmem::{InMemoryFlowConfigurationEventStore, InMemoryFlowEventStore}; -use kamu_flow_system_services::{FlowConfigurationServiceImpl, FlowServiceImpl}; -use kamu_task_system_inmem::InMemoryTaskSystemEventStore; +use kamu_task_system_inmem::InMemoryTaskEventStore; use kamu_task_system_services::TaskSchedulerImpl; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxImmediateImpl}; use opendatafabric::{AccountName, DatasetAlias, DatasetID, DatasetKind, DatasetName}; @@ -678,16 +677,14 @@ impl FlowConfigHarness { .add::() .add_value(dependency_graph_mock) .bind::() - .add::() .add::() - .add::() .add::() - .add_value(FlowServiceRunConfig::new( + .add_value(FlowExecutorConfig::new( Duration::try_seconds(1).unwrap(), Duration::try_minutes(1).unwrap(), )) .add::() - .add::() + .add::() .add_value(transform_service_mock) .bind::() .add_value(polling_service_mock) @@ -697,6 +694,7 @@ impl FlowConfigHarness { .add::(); NoOpDatabasePlugin::init_database_components(&mut b); + kamu_flow_system_services::register_dependencies(&mut b); register_message_dispatcher::( &mut b, diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs index 656e84581..e1c1b37b7 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs @@ -57,20 +57,17 @@ use kamu_flow_system::{ Flow, FlowConfigurationUpdatedMessage, FlowEventStore, + FlowExecutorConfig, + FlowExecutorTestDriver, FlowID, - FlowServiceRunConfig, - FlowServiceTestDriver, FlowTrigger, FlowTriggerAutoPolling, + METADATA_TASK_FLOW_ID, }; use kamu_flow_system_inmem::{InMemoryFlowConfigurationEventStore, InMemoryFlowEventStore}; -use kamu_flow_system_services::{ - FlowConfigurationServiceImpl, - FlowServiceImpl, - MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, -}; -use kamu_task_system::{self as ts}; -use kamu_task_system_inmem::InMemoryTaskSystemEventStore; +use kamu_flow_system_services::MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE; +use kamu_task_system::{self as ts, TaskMetadata}; +use kamu_task_system_inmem::InMemoryTaskEventStore; use kamu_task_system_services::TaskSchedulerImpl; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxExt, OutboxImmediateImpl}; use opendatafabric::{AccountID, DatasetID, DatasetKind, Multihash}; @@ -200,6 +197,7 @@ async fn test_trigger_ingest_root_dataset() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); let flow_task_id = harness.mimic_flow_scheduled("0", schedule_time).await; + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]); let response = schema .execute( @@ -274,7 +272,9 @@ async fn test_trigger_ingest_root_dataset() { let running_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); - harness.mimic_task_running(flow_task_id, running_time).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), running_time) + .await; let response = schema .execute( @@ -349,6 +349,7 @@ async fn test_trigger_ingest_root_dataset() { harness .mimic_task_completed( flow_task_id, + flow_task_metadata, complete_time, ts::TaskOutcome::Success(ts::TaskResult::UpdateDatasetResult( ts::TaskUpdateDatasetResult { @@ -502,11 +503,14 @@ async fn test_trigger_reset_root_dataset_flow() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); let flow_task_id = harness.mimic_flow_scheduled("0", schedule_time).await; + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]); let running_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); - harness.mimic_task_running(flow_task_id, running_time).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), running_time) + .await; let complete_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) @@ -514,6 +518,7 @@ async fn test_trigger_reset_root_dataset_flow() { harness .mimic_task_completed( flow_task_id, + flow_task_metadata, complete_time, ts::TaskOutcome::Success(ts::TaskResult::ResetDatasetResult( ts::TaskResetDatasetResult { @@ -822,11 +827,14 @@ async fn test_trigger_execute_transform_derived_dataset() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); let flow_task_id = harness.mimic_flow_scheduled("0", schedule_time).await; + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]); let running_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); - harness.mimic_task_running(flow_task_id, running_time).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), running_time) + .await; let complete_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) @@ -834,6 +842,7 @@ async fn test_trigger_execute_transform_derived_dataset() { harness .mimic_task_completed( flow_task_id, + flow_task_metadata, complete_time, ts::TaskOutcome::Success(ts::TaskResult::UpdateDatasetResult( ts::TaskUpdateDatasetResult { @@ -1042,6 +1051,7 @@ async fn test_trigger_compaction_root_dataset() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); let flow_task_id = harness.mimic_flow_scheduled("0", schedule_time).await; + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]); let response = schema .execute( @@ -1116,7 +1126,9 @@ async fn test_trigger_compaction_root_dataset() { let running_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); - harness.mimic_task_running(flow_task_id, running_time).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), running_time) + .await; let response = schema .execute( @@ -1193,6 +1205,7 @@ async fn test_trigger_compaction_root_dataset() { harness .mimic_task_completed( flow_task_id, + flow_task_metadata, complete_time, ts::TaskOutcome::Success(ts::TaskResult::CompactionDatasetResult( ts::TaskCompactionDatasetResult { @@ -2006,9 +2019,12 @@ async fn test_cancel_ingest_root_dataset() { assert!(response.is_ok(), "{response:?}"); let response_json = response.data.into_json().unwrap(); let flow_id = FlowRunsHarness::extract_flow_id_from_trigger_response(&response_json); + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, flow_id)]); let task_id = harness.mimic_flow_scheduled(flow_id, Utc::now()).await; - harness.mimic_task_running(task_id, Utc::now()).await; + harness + .mimic_task_running(task_id, flow_task_metadata, Utc::now()) + .await; let mutation_code = FlowRunsHarness::cancel_scheduled_tasks_mutation(&create_result.dataset_handle.id, flow_id); @@ -2078,9 +2094,12 @@ async fn test_cancel_running_transform_derived_dataset() { assert!(response.is_ok(), "{response:?}"); let response_json = response.data.into_json().unwrap(); let flow_id = FlowRunsHarness::extract_flow_id_from_trigger_response(&response_json); + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, flow_id)]); let task_id = harness.mimic_flow_scheduled(flow_id, Utc::now()).await; - harness.mimic_task_running(task_id, Utc::now()).await; + harness + .mimic_task_running(task_id, flow_task_metadata, Utc::now()) + .await; let mutation_code = FlowRunsHarness::cancel_scheduled_tasks_mutation( &create_derived_result.dataset_handle.id, @@ -2149,9 +2168,12 @@ async fn test_cancel_hard_compaction_root_dataset() { assert!(response.is_ok(), "{response:?}"); let response_json = response.data.into_json().unwrap(); let flow_id = FlowRunsHarness::extract_flow_id_from_trigger_response(&response_json); + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, flow_id)]); let task_id = harness.mimic_flow_scheduled(flow_id, Utc::now()).await; - harness.mimic_task_running(task_id, Utc::now()).await; + harness + .mimic_task_running(task_id, flow_task_metadata, Utc::now()) + .await; let mutation_code = FlowRunsHarness::cancel_scheduled_tasks_mutation(&create_result.dataset_handle.id, flow_id); @@ -2395,9 +2417,12 @@ async fn test_cancel_already_aborted_flow() { let flow_id = res_json["datasets"]["byId"]["flows"]["runs"]["triggerFlow"]["flow"]["flowId"] .as_str() .unwrap(); + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, flow_id)]); let task_id = harness.mimic_flow_scheduled(flow_id, Utc::now()).await; - harness.mimic_task_running(task_id, Utc::now()).await; + harness + .mimic_task_running(task_id, flow_task_metadata, Utc::now()) + .await; let mutation_code = FlowRunsHarness::cancel_scheduled_tasks_mutation(&create_result.dataset_handle.id, flow_id); @@ -2475,12 +2500,16 @@ async fn test_cancel_already_succeeded_flow() { assert!(response.is_ok(), "{response:?}"); let response_json = response.data.into_json().unwrap(); let flow_id = FlowRunsHarness::extract_flow_id_from_trigger_response(&response_json); + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, flow_id)]); let flow_task_id = harness.mimic_flow_scheduled(flow_id, Utc::now()).await; - harness.mimic_task_running(flow_task_id, Utc::now()).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), Utc::now()) + .await; harness .mimic_task_completed( flow_task_id, + flow_task_metadata, Utc::now(), ts::TaskOutcome::Success(ts::TaskResult::Empty), ) @@ -2552,6 +2581,8 @@ async fn test_history_of_completed_flow() { assert!(response.is_ok(), "{response:?}"); let response_json = response.data.into_json().unwrap(); let flow_id = FlowRunsHarness::extract_flow_id_from_trigger_response(&response_json); + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, flow_id)]); + harness .mimic_flow_secondary_trigger( flow_id, @@ -2562,10 +2593,13 @@ async fn test_history_of_completed_flow() { .await; let flow_task_id = harness.mimic_flow_scheduled(flow_id, Utc::now()).await; - harness.mimic_task_running(flow_task_id, Utc::now()).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), Utc::now()) + .await; harness .mimic_task_completed( flow_task_id, + flow_task_metadata, Utc::now(), ts::TaskOutcome::Success(ts::TaskResult::Empty), ) @@ -2716,10 +2750,14 @@ async fn test_execute_transfrom_flow_error_after_compaction() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); let flow_task_id = harness.mimic_flow_scheduled("0", schedule_time).await; + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]); + let running_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); - harness.mimic_task_running(flow_task_id, running_time).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), running_time) + .await; let complete_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); @@ -2728,6 +2766,7 @@ async fn test_execute_transfrom_flow_error_after_compaction() { harness .mimic_task_completed( flow_task_id, + flow_task_metadata, complete_time, ts::TaskOutcome::Success(ts::TaskResult::CompactionDatasetResult( ts::TaskCompactionDatasetResult { @@ -2864,16 +2903,21 @@ async fn test_execute_transfrom_flow_error_after_compaction() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); let flow_task_id = harness.mimic_flow_scheduled("1", schedule_time).await; + let flow_task_metadata = TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]); + let running_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); - harness.mimic_task_running(flow_task_id, running_time).await; + harness + .mimic_task_running(flow_task_id, flow_task_metadata.clone(), running_time) + .await; let complete_time = Utc::now() .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); harness .mimic_task_completed( flow_task_id, + flow_task_metadata, complete_time, ts::TaskOutcome::Failed(ts::TaskError::UpdateDatasetError( ts::UpdateDatasetTaskError::RootDatasetCompacted(ts::RootDatasetCompactedError { @@ -3181,16 +3225,14 @@ impl FlowRunsHarness { .add::() .add_value(dependency_graph_mock) .bind::() - .add::() .add::() - .add::() .add::() - .add_value(FlowServiceRunConfig::new( + .add_value(FlowExecutorConfig::new( Duration::try_seconds(1).unwrap(), Duration::try_minutes(1).unwrap(), )) .add::() - .add::() + .add::() .add_value(transform_service_mock) .bind::() .add_value(polling_service_mock) @@ -3203,6 +3245,7 @@ impl FlowRunsHarness { .add::(); NoOpDatabasePlugin::init_database_components(&mut b); + kamu_flow_system_services::register_dependencies(&mut b); register_message_dispatcher::( &mut b, @@ -3293,12 +3336,12 @@ impl FlowRunsHarness { ) -> ts::TaskID { let flow_service_test_driver = self .catalog_authorized - .get_one::() + .get_one::() .unwrap(); let flow_id = FlowID::new(flow_id.parse::().unwrap()); flow_service_test_driver - .mimic_flow_scheduled(flow_id, schedule_time) + .mimic_flow_scheduled(&self.catalog_authorized, flow_id, schedule_time) .await .unwrap() } @@ -3321,16 +3364,15 @@ impl FlowRunsHarness { flow.save(flow_event_store.as_ref()).await.unwrap(); } - async fn mimic_task_running(&self, task_id: ts::TaskID, event_time: DateTime) { - let flow_service_test_driver = self - .catalog_authorized - .get_one::() - .unwrap(); - flow_service_test_driver.mimic_running_started(); - + async fn mimic_task_running( + &self, + task_id: ts::TaskID, + task_metadata: ts::TaskMetadata, + event_time: DateTime, + ) { let task_event_store = self .catalog_anonymous - .get_one::() + .get_one::() .unwrap(); let mut task = ts::Task::load(task_id, task_event_store.as_ref()) @@ -3343,7 +3385,7 @@ impl FlowRunsHarness { outbox .post_message( ts::MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, - ts::TaskProgressMessage::running(event_time, task_id), + ts::TaskProgressMessage::running(event_time, task_id, task_metadata), ) .await .unwrap(); @@ -3352,18 +3394,13 @@ impl FlowRunsHarness { async fn mimic_task_completed( &self, task_id: ts::TaskID, + task_metadata: ts::TaskMetadata, event_time: DateTime, task_outcome: ts::TaskOutcome, ) { - let flow_service_test_driver = self - .catalog_authorized - .get_one::() - .unwrap(); - flow_service_test_driver.mimic_running_started(); - let task_event_store = self .catalog_anonymous - .get_one::() + .get_one::() .unwrap(); let mut task = ts::Task::load(task_id, task_event_store.as_ref()) @@ -3376,7 +3413,7 @@ impl FlowRunsHarness { outbox .post_message( ts::MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, - ts::TaskProgressMessage::finished(event_time, task_id, task_outcome), + ts::TaskProgressMessage::finished(event_time, task_id, task_metadata, task_outcome), ) .await .unwrap(); diff --git a/src/adapter/graphql/tests/tests/test_tasks.rs b/src/adapter/graphql/tests/tests/test_tasks.rs deleted file mode 100644 index 69acea7e8..000000000 --- a/src/adapter/graphql/tests/tests/test_tasks.rs +++ /dev/null @@ -1,296 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use async_graphql::*; -use chrono::Utc; -use dill::*; -use kamu_task_system::*; -use opendatafabric::DatasetID; - -use crate::utils::{authentication_catalogs, expect_anonymous_access_error}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -mockall::mock! { - TaskScheduler {} - #[async_trait::async_trait] - impl TaskScheduler for TaskScheduler { - async fn get_task(&self, task_id: TaskID) -> Result; - async fn list_tasks_by_dataset<'a>(&'a self, dataset_id: &DatasetID, pagination: TaskPaginationOpts) -> Result, ListTasksByDatasetError>; - async fn create_task(&self, plan: LogicalPlan) -> Result; - async fn cancel_task(&self, task_id: TaskID) -> Result; - async fn take(&self) -> Result; - async fn try_take(&self) -> Result, TakeTaskError>; - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_task_get_non_existing() { - let mut task_sched_mock = MockTaskScheduler::new(); - task_sched_mock.expect_get_task().return_once(|_| { - Err(GetTaskError::NotFound(TaskNotFoundError { - task_id: TaskID::new(1), - })) - }); - - let cat = dill::CatalogBuilder::new() - .add_value(task_sched_mock) - .bind::() - .build(); - - let schema = kamu_adapter_graphql::schema_quiet(); - let res = schema - .execute( - async_graphql::Request::new( - r#"{ - tasks { - getTask (taskId: "123") { - taskId - } - } - }"#, - ) - .data(cat), - ) - .await; - assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ - "tasks": { - "getTask": null, - } - }) - ); -} - -#[test_log::test(tokio::test)] -async fn test_task_get_existing() { - let returned_task = TaskState { - task_id: TaskID::new(123), - status: TaskStatus::Finished(TaskOutcome::Success(TaskResult::Empty)), - cancellation_requested: false, - logical_plan: LogicalPlan::UpdateDataset(UpdateDataset { - dataset_id: DatasetID::new_seeded_ed25519(b"foo"), - fetch_uncacheable: false, - }), - created_at: Utc::now(), - ran_at: None, - cancellation_requested_at: None, - finished_at: None, - }; - let expected_task = returned_task.clone(); - - let mut task_sched_mock = MockTaskScheduler::new(); - task_sched_mock - .expect_get_task() - .with(mockall::predicate::eq(expected_task.task_id)) - .return_once(move |_| Ok(returned_task)); - - let cat = dill::CatalogBuilder::new() - .add_value(task_sched_mock) - .bind::() - .build(); - - let schema = kamu_adapter_graphql::schema_quiet(); - let res = schema - .execute( - async_graphql::Request::new(format!( - r#"{{ - tasks {{ - getTask (taskId: "{}") {{ - taskId - status - cancellationRequested - outcome - }} - }} - }}"#, - expected_task.task_id, - )) - .data(cat), - ) - .await; - assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ - "tasks": { - "getTask": { - "taskId": expected_task.task_id.to_string(), - "cancellationRequested": false, - "status": "FINISHED", - "outcome": "SUCCESS", - }, - } - }) - ); -} - -#[test_log::test(tokio::test)] -async fn test_task_list_by_dataset() { - let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); - - let returned_task = TaskState { - task_id: TaskID::new(123), - status: TaskStatus::Queued, - cancellation_requested: false, - logical_plan: LogicalPlan::UpdateDataset(UpdateDataset { - dataset_id: dataset_id.clone(), - fetch_uncacheable: false, - }), - created_at: Utc::now(), - ran_at: None, - cancellation_requested_at: None, - finished_at: None, - }; - let expected_task = returned_task.clone(); - - let mut task_sched_mock = MockTaskScheduler::new(); - task_sched_mock - .expect_list_tasks_by_dataset() - .return_once(move |_, _| { - Ok(TaskStateListing { - stream: Box::pin(futures::stream::once(async { Ok(returned_task) })), - total_count: 1, - }) - }); - - let cat = dill::CatalogBuilder::new() - .add_value(task_sched_mock) - .bind::() - .build(); - - let schema = kamu_adapter_graphql::schema_quiet(); - let res = schema - .execute( - async_graphql::Request::new(format!( - r#"{{ - tasks {{ - listTasksByDataset (datasetId: "{dataset_id}") {{ - nodes {{ - taskId - status - outcome - }} - pageInfo {{ - hasPreviousPage - hasNextPage - currentPage - totalPages - }} - }} - }} - }}"#, - )) - .data(cat), - ) - .await; - assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ - "tasks": { - "listTasksByDataset": { - "nodes": [{ - "taskId": expected_task.task_id.to_string(), - "status": "QUEUED", - "outcome": null, - }], - "pageInfo": { - "hasPreviousPage": false, - "hasNextPage": false, - "currentPage": 0, - "totalPages": 1, - }, - }, - }, - }) - ); -} - -#[test_log::test(tokio::test)] -async fn test_task_create_probe() { - let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); - - let expected_logical_plan = LogicalPlan::Probe(Probe { - dataset_id: Some(dataset_id.clone()), - busy_time: Some(std::time::Duration::from_millis(500)), - end_with_outcome: Some(TaskOutcome::Failed(TaskError::Empty)), - }); - let returned_task = TaskState { - task_id: TaskID::new(123), - status: TaskStatus::Queued, - cancellation_requested: false, - logical_plan: expected_logical_plan.clone(), - created_at: Utc::now(), - ran_at: None, - cancellation_requested_at: None, - finished_at: None, - }; - let expected_task = returned_task.clone(); - - let mut task_sched_mock = MockTaskScheduler::new(); - task_sched_mock - .expect_create_task() - .withf(move |logical_plan| *logical_plan == expected_logical_plan) - .return_once(move |_| Ok(returned_task)); - - let base_cat = create_catalog(task_sched_mock); - let (cat_anonymous, cat_authorized) = authentication_catalogs(&base_cat).await; - - let request_code = format!( - r#"mutation {{ - tasks {{ - createProbeTask (datasetId: "{dataset_id}", busyTimeMs: 500, endWithOutcome: FAILED) {{ - taskId - }} - }} - }}"# - ); - - let schema = kamu_adapter_graphql::schema_quiet(); - - let res = schema - .execute(async_graphql::Request::new(request_code.clone()).data(cat_anonymous)) - .await; - expect_anonymous_access_error(res); - - let res = schema - .execute(async_graphql::Request::new(request_code).data(cat_authorized)) - .await; - assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ - "tasks": { - "createProbeTask": { - "taskId": expected_task.task_id.to_string(), - }, - }, - }) - ); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -fn create_catalog(task_sched_mock: MockTaskScheduler) -> Catalog { - let mut b = CatalogBuilder::new(); - - b.add_value(task_sched_mock) - .bind::(); - - database_common::NoOpDatabasePlugin::init_database_components(&mut b); - - b.build() -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/app/cli/Cargo.toml b/src/app/cli/Cargo.toml index e052ead82..6456653dd 100644 --- a/src/app/cli/Cargo.toml +++ b/src/app/cli/Cargo.toml @@ -67,9 +67,8 @@ kamu-flow-system-sqlite = { workspace = true } kamu-task-system-services = { workspace = true } kamu-task-system-inmem = { workspace = true } -# TODO: Activate after preparing services for transactional work -# kamu-task-system-postgres = { workspace = true } -# kamu-task-system-sqlite = { workspace = true } +kamu-task-system-postgres = { workspace = true } +kamu-task-system-sqlite = { workspace = true } kamu-accounts = { workspace = true } kamu-accounts-services = { workspace = true } diff --git a/src/app/cli/src/app.rs b/src/app/cli/src/app.rs index e6ac4a97b..7e16118af 100644 --- a/src/app/cli/src/app.rs +++ b/src/app/cli/src/app.rs @@ -340,10 +340,6 @@ pub fn configure_base_catalog( b.add::(); - b.add::(); - - b.add::(); - b.add::(); b.add::(); @@ -356,12 +352,13 @@ pub fn configure_base_catalog( b.add::(); b.add::(); - b.add::(); - b.add::(); - b.add_value(kamu_flow_system_inmem::domain::FlowServiceRunConfig::new( + kamu_task_system_services::register_dependencies(&mut b); + + b.add_value(kamu_flow_system_inmem::domain::FlowExecutorConfig::new( chrono::Duration::try_seconds(1).unwrap(), chrono::Duration::try_minutes(1).unwrap(), )); + kamu_flow_system_services::register_dependencies(&mut b); b.add::(); diff --git a/src/app/cli/src/commands/system_api_server_run_command.rs b/src/app/cli/src/commands/system_api_server_run_command.rs index 6b303803a..40b49aa2d 100644 --- a/src/app/cli/src/commands/system_api_server_run_command.rs +++ b/src/app/cli/src/commands/system_api_server_run_command.rs @@ -166,6 +166,7 @@ impl Command for APIServerRunCommand { } } + api_server.pre_run().await.map_err(CLIError::critical)?; api_server.run().await.map_err(CLIError::critical)?; Ok(()) diff --git a/src/app/cli/src/database.rs b/src/app/cli/src/database.rs index b55e60e34..e5836ace1 100644 --- a/src/app/cli/src/database.rs +++ b/src/app/cli/src/database.rs @@ -23,13 +23,6 @@ pub fn configure_database_components( raw_db_config: &DatabaseConfig, db_connection_settings: DatabaseConnectionSettings, ) { - // TODO: Remove after adding implementation of FlowEventStore for databases - b.add::(); - - // TODO: Delete after preparing services for transactional work and replace with - // permanent storage options - b.add::(); - match db_connection_settings.provider { DatabaseProvider::Postgres => { PostgresPlugin::init_database_components(b); @@ -40,6 +33,9 @@ pub fn configure_database_components( b.add::(); b.add::(); + b.add::(); + + b.add::(); b.add::(); b.add::(); @@ -50,20 +46,23 @@ pub fn configure_database_components( DatabaseProvider::MySql | DatabaseProvider::MariaDB => { MySqlPlugin::init_database_components(b); + // TODO: many components are not implemented for MySQL + // and are substituted with in-memory equivalents + b.add::(); b.add::(); b.add::(); b.add::(); + b.add::(); + + b.add::(); b.add::(); b.add::(); - // TODO: Private Datasets: implement database-related version b.add::(); - - // TODO: Task & Flow System MySQL versions } DatabaseProvider::Sqlite => { SqlitePlugin::init_database_components(b); @@ -73,7 +72,10 @@ pub fn configure_database_components( b.add::(); - b.add::(); + b.add::(); + b.add::(); + + b.add::(); b.add::(); b.add::(); @@ -97,7 +99,7 @@ pub fn configure_in_memory_components(b: &mut CatalogBuilder) { b.add::(); b.add::(); b.add::(); - b.add::(); + b.add::(); b.add::(); b.add::(); diff --git a/src/app/cli/src/explore/api_server.rs b/src/app/cli/src/explore/api_server.rs index 01cdfd7a6..85fcea651 100644 --- a/src/app/cli/src/explore/api_server.rs +++ b/src/app/cli/src/explore/api_server.rs @@ -22,7 +22,7 @@ use indoc::indoc; use internal_error::*; use kamu::domain::{Protocols, ServerUrlConfig}; use kamu_adapter_http::e2e::e2e_router; -use kamu_flow_system_inmem::domain::FlowService; +use kamu_flow_system_inmem::domain::FlowExecutor; use kamu_task_system_inmem::domain::TaskExecutor; use messaging_outbox::OutboxTransactionalProcessor; use time_source::SystemTimeSource; @@ -37,7 +37,7 @@ pub struct APIServer { axum::routing::IntoMakeService, >, task_executor: Arc, - flow_service: Arc, + flow_executor: Arc, outbox_processor: Arc, time_source: Arc, maybe_shutdown_notify: Option>, @@ -57,7 +57,7 @@ impl APIServer { // behalf of the system, as they are automatically scheduled let task_executor = cli_catalog.get_one().unwrap(); - let flow_service = cli_catalog.get_one().unwrap(); + let flow_executor = cli_catalog.get_one().unwrap(); let outbox_processor = cli_catalog.get_one().unwrap(); @@ -172,7 +172,7 @@ impl APIServer { Self { server, task_executor, - flow_service, + flow_executor, outbox_processor, time_source, maybe_shutdown_notify, @@ -183,6 +183,13 @@ impl APIServer { self.server.local_addr() } + pub async fn pre_run(&self) -> Result<(), InternalError> { + self.task_executor.pre_run().await?; + self.flow_executor.pre_run(self.time_source.now()).await?; + self.outbox_processor.pre_run().await?; + Ok(()) + } + pub async fn run(self) -> Result<(), InternalError> { let server_run_fut: Pin>> = if let Some(shutdown_notify) = self.maybe_shutdown_notify { @@ -201,7 +208,7 @@ impl APIServer { res = server_run_fut => { res.int_err() }, res = self.outbox_processor.run() => { res.int_err() }, res = self.task_executor.run() => { res.int_err() }, - res = self.flow_service.run(self.time_source.now()) => { res.int_err() } + res = self.flow_executor.run() => { res.int_err() } } } } diff --git a/src/domain/accounts/domain/src/repos/access_token_repository.rs b/src/domain/accounts/domain/src/repos/access_token_repository.rs index 2c3365dec..935615205 100644 --- a/src/domain/accounts/domain/src/repos/access_token_repository.rs +++ b/src/domain/accounts/domain/src/repos/access_token_repository.rs @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use internal_error::InternalError; use opendatafabric::AccountID; use thiserror::Error; @@ -30,7 +30,7 @@ pub trait AccessTokenRepository: Send + Sync { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetAccessTokenError>; async fn get_access_tokens_count_by_account_id( diff --git a/src/domain/accounts/domain/src/services/access_token_service.rs b/src/domain/accounts/domain/src/services/access_token_service.rs index 635825af5..b2ea3d272 100644 --- a/src/domain/accounts/domain/src/services/access_token_service.rs +++ b/src/domain/accounts/domain/src/services/access_token_service.rs @@ -9,7 +9,7 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use opendatafabric::AccountID; use uuid::Uuid; @@ -42,7 +42,7 @@ pub trait AccessTokenService: Sync + Send { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result; async fn revoke_access_token(&self, token_id: &Uuid) -> Result<(), RevokeTokenError>; diff --git a/src/domain/accounts/services/src/access_token_service_impl.rs b/src/domain/accounts/services/src/access_token_service_impl.rs index 84fc09832..d1f149dca 100644 --- a/src/domain/accounts/services/src/access_token_service_impl.rs +++ b/src/domain/accounts/services/src/access_token_service_impl.rs @@ -9,7 +9,7 @@ use std::sync::Arc; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::*; use kamu_accounts::{ AccessToken, @@ -95,7 +95,7 @@ impl AccessTokenService for AccessTokenServiceImpl { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result { let total_count = self .access_token_repository diff --git a/src/domain/datasets/domain/src/repos/dataset_env_var_repository.rs b/src/domain/datasets/domain/src/repos/dataset_env_var_repository.rs index d55880399..e2274b2c3 100644 --- a/src/domain/datasets/domain/src/repos/dataset_env_var_repository.rs +++ b/src/domain/datasets/domain/src/repos/dataset_env_var_repository.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use internal_error::InternalError; use opendatafabric::DatasetID; use thiserror::Error; @@ -32,7 +32,7 @@ pub trait DatasetEnvVarRepository: Send + Sync { async fn get_all_dataset_env_vars_by_dataset_id( &self, dataset_id: &DatasetID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetDatasetEnvVarError>; async fn get_dataset_env_var_by_id( diff --git a/src/domain/datasets/domain/src/services/dataset_env_var_service.rs b/src/domain/datasets/domain/src/services/dataset_env_var_service.rs index 502fbda90..f2ee2e842 100644 --- a/src/domain/datasets/domain/src/services/dataset_env_var_service.rs +++ b/src/domain/datasets/domain/src/services/dataset_env_var_service.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use internal_error::InternalError; use opendatafabric::DatasetID; use uuid::Uuid; @@ -45,7 +45,7 @@ pub trait DatasetEnvVarService: Sync + Send { async fn get_all_dataset_env_vars_by_dataset_id( &self, dataset_id: &DatasetID, - pagination: Option, + pagination: Option, ) -> Result; async fn delete_dataset_env_var( diff --git a/src/domain/datasets/services/src/dataset_env_var_service_impl.rs b/src/domain/datasets/services/src/dataset_env_var_service_impl.rs index 69067d3df..5e4d7b457 100644 --- a/src/domain/datasets/services/src/dataset_env_var_service_impl.rs +++ b/src/domain/datasets/services/src/dataset_env_var_service_impl.rs @@ -9,7 +9,7 @@ use std::sync::Arc; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::*; use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; use kamu_datasets::{ @@ -107,7 +107,7 @@ impl DatasetEnvVarService for DatasetEnvVarServiceImpl { async fn get_all_dataset_env_vars_by_dataset_id( &self, dataset_id: &DatasetID, - pagination: Option, + pagination: Option, ) -> Result { let total_count = self .dataset_env_var_repository @@ -119,11 +119,8 @@ impl DatasetEnvVarService for DatasetEnvVarServiceImpl { list: vec![], }); } - let database_pagination = pagination.unwrap_or(DatabasePaginationOpts { - // We assume that it is impossible to reach dataset env vars count bigger - // than max i64 value - #[allow(clippy::cast_possible_wrap)] - limit: total_count as i64, + let database_pagination = pagination.unwrap_or(PaginationOpts { + limit: total_count, offset: 0, }); diff --git a/src/domain/datasets/services/src/dataset_env_var_service_null.rs b/src/domain/datasets/services/src/dataset_env_var_service_null.rs index 7b65f5a20..c8e4c922b 100644 --- a/src/domain/datasets/services/src/dataset_env_var_service_null.rs +++ b/src/domain/datasets/services/src/dataset_env_var_service_null.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::*; use internal_error::InternalError; use kamu_datasets::{ @@ -67,7 +67,7 @@ impl DatasetEnvVarService for DatasetEnvVarServiceNull { async fn get_all_dataset_env_vars_by_dataset_id( &self, _dataset_id: &DatasetID, - _pagination: Option, + _pagination: Option, ) -> Result { Ok(DatasetEnvVarListing { list: vec![], diff --git a/src/domain/flow-system/domain/Cargo.toml b/src/domain/flow-system/domain/Cargo.toml index 9abd5c89c..873d5f9ff 100644 --- a/src/domain/flow-system/domain/Cargo.toml +++ b/src/domain/flow-system/domain/Cargo.toml @@ -22,6 +22,7 @@ doctest = false [dependencies] +database-common = { workspace = true } enum-variants = { workspace = true } event-sourcing = { workspace = true } internal-error = { workspace = true } @@ -34,6 +35,7 @@ kamu-task-system = { workspace = true } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } cron = { version = "0.12", default-features = false } +dill = { version = "0.9" } lazy_static = { version = "1" } sqlx = { version = "0.8", default-features = false, features = ["macros"] } thiserror = { version = "1", default-features = false } diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_event.rs b/src/domain/flow-system/domain/src/entities/flow/flow_event.rs index eebcee31b..022e91c64 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_event.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_event.rs @@ -10,12 +10,13 @@ use chrono::{DateTime, Utc}; use enum_variants::*; use kamu_task_system::{TaskID, TaskOutcome}; +use serde::{Deserialize, Serialize}; use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum FlowEvent { /// Flow initiated Initiated(FlowEventInitiated), @@ -33,9 +34,23 @@ pub enum FlowEvent { Aborted(FlowEventAborted), } +impl FlowEvent { + pub fn typename(&self) -> &'static str { + match self { + FlowEvent::Initiated(_) => "FlowEventInitiated", + FlowEvent::StartConditionUpdated(_) => "FlowEventStartConditionUpdated", + FlowEvent::TriggerAdded(_) => "FlowEventTriggerAdded", + FlowEvent::TaskScheduled(_) => "FlowEventTaskScheduled", + FlowEvent::TaskRunning(_) => "FlowEventTaskRunning", + FlowEvent::TaskFinished(_) => "FlowEventTaskFinished", + FlowEvent::Aborted(_) => "FlowEventAborted", + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventInitiated { pub event_time: DateTime, pub flow_id: FlowID, @@ -46,7 +61,7 @@ pub struct FlowEventInitiated { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventStartConditionUpdated { pub event_time: DateTime, pub flow_id: FlowID, @@ -56,7 +71,7 @@ pub struct FlowEventStartConditionUpdated { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventTriggerAdded { pub event_time: DateTime, pub flow_id: FlowID, @@ -65,7 +80,7 @@ pub struct FlowEventTriggerAdded { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventTaskScheduled { pub event_time: DateTime, pub flow_id: FlowID, @@ -74,7 +89,7 @@ pub struct FlowEventTaskScheduled { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventTaskRunning { pub event_time: DateTime, pub flow_id: FlowID, @@ -83,7 +98,7 @@ pub struct FlowEventTaskRunning { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventTaskFinished { pub event_time: DateTime, pub flow_id: FlowID, @@ -93,7 +108,7 @@ pub struct FlowEventTaskFinished { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowEventAborted { pub event_time: DateTime, pub flow_id: FlowID, diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_id.rs b/src/domain/flow-system/domain/src/entities/flow/flow_id.rs index c88e3a246..62aaf11ec 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_id.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_id.rs @@ -7,18 +7,35 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::num::TryFromIntError; + use internal_error::InternalError; +use serde::{Deserialize, Serialize}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Uniquely identifies a flow -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] pub struct FlowID(u64); impl FlowID { pub fn new(id: u64) -> Self { Self(id) } + + pub fn from(id_as_str: &str) -> Result { + let id = id_as_str.parse()?; + Ok(Self(id)) + } +} + +impl TryFrom for FlowID { + type Error = TryFromIntError; + + fn try_from(val: i64) -> Result { + let id: u64 = u64::try_from(val)?; + Ok(Self::new(id)) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -35,6 +52,14 @@ impl From for u64 { } } +impl TryFrom for i64 { + type Error = TryFromIntError; + + fn try_from(val: FlowID) -> Result { + i64::try_from(val.0) + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub type FlowIDStream<'a> = diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs b/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs index f8ac4b030..599cb6647 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs @@ -10,6 +10,7 @@ use kamu_core::{CompactionResult, PullResult, PullResultUpToDate}; use kamu_task_system::{self as ts, ResetDatasetTaskError, UpdateDatasetTaskError}; use opendatafabric::{DatasetID, Multihash}; +use serde::{Deserialize, Serialize}; use ts::TaskError; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -35,7 +36,7 @@ impl FlowOutcome { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum FlowResult { Empty, DatasetUpdate(FlowResultDatasetUpdate), @@ -84,31 +85,31 @@ impl From<&TaskError> for FlowError { } } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum FlowResultDatasetUpdate { Changed(FlowResultDatasetUpdateChanged), UpToDate(FlowResultDatasetUpdateUpToDate), } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowResultDatasetUpdateChanged { pub old_head: Option, pub new_head: Multihash, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowResultDatasetUpdateUpToDate { pub uncacheable: bool, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowResultDatasetCompact { pub new_head: Multihash, pub old_num_blocks: usize, pub new_num_blocks: usize, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowResultDatasetReset { pub new_head: Multihash, } diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_start_condition.rs b/src/domain/flow-system/domain/src/entities/flow/flow_start_condition.rs index ec88aa2a4..d5fa72ac5 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_start_condition.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_start_condition.rs @@ -9,12 +9,13 @@ use chrono::{DateTime, Duration, Utc}; use kamu_task_system::TaskID; +use serde::{Deserialize, Serialize}; use crate::TransformRule; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum FlowStartCondition { Schedule(FlowStartConditionSchedule), Throttling(FlowStartConditionThrottling), @@ -22,17 +23,29 @@ pub enum FlowStartCondition { Executor(FlowStartConditionExecutor), } +impl FlowStartCondition { + pub fn wake_up_at(&self) -> Option> { + match self { + Self::Schedule(s) => Some(s.wake_up_at), + Self::Throttling(t) => Some(t.wake_up_at), + Self::Batching(_) | Self::Executor(_) => None, + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowStartConditionSchedule { pub wake_up_at: DateTime, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[serde_with::serde_as] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowStartConditionThrottling { + #[serde_as(as = "serde_with::DurationMilliSeconds")] pub interval: Duration, pub wake_up_at: DateTime, pub shifted_from: DateTime, @@ -40,7 +53,7 @@ pub struct FlowStartConditionThrottling { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowStartConditionBatching { pub active_transform_rule: TransformRule, pub batching_deadline: DateTime, @@ -48,7 +61,7 @@ pub struct FlowStartConditionBatching { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowStartConditionExecutor { pub task_id: TaskID, } diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_state.rs b/src/domain/flow-system/domain/src/entities/flow/flow_state.rs index 637359d68..126e9a71e 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_state.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_state.rs @@ -63,6 +63,17 @@ impl FlowState { } } + // Extract wakeup time + pub fn wake_up_at(&self) -> Option> { + if self.status() == FlowStatus::Waiting { + if let Some(start_condition) = self.start_condition.as_ref() { + return start_condition.wake_up_at(); + } + } + + None + } + pub fn try_result_as_ref(&self) -> Option<&FlowResult> { self.outcome .as_ref() diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_status.rs b/src/domain/flow-system/domain/src/entities/flow/flow_status.rs index d4dd4ae74..5f1e4a0f4 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_status.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_status.rs @@ -9,7 +9,8 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, sqlx::Type)] +#[sqlx(type_name = "flow_status_type", rename_all = "snake_case")] pub enum FlowStatus { Waiting, Running, diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_trigger.rs b/src/domain/flow-system/domain/src/entities/flow/flow_trigger.rs index 8f396370e..a24d236b9 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_trigger.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_trigger.rs @@ -9,12 +9,13 @@ use chrono::{DateTime, Utc}; use opendatafabric::{AccountID, DatasetID}; +use serde::{Deserialize, Serialize}; use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum FlowTrigger { Manual(FlowTriggerManual), AutoPolling(FlowTriggerAutoPolling), @@ -78,7 +79,7 @@ impl FlowTrigger { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowTriggerManual { pub trigger_time: DateTime, pub initiator_account_id: AccountID, @@ -92,14 +93,14 @@ pub type InitiatorIDStream<'a> = std::pin::Pin< //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowTriggerAutoPolling { pub trigger_time: DateTime, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowTriggerPush { // TODO: source (HTTP, MQTT, CMD, ...) pub trigger_time: DateTime, @@ -108,7 +109,7 @@ pub struct FlowTriggerPush { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct FlowTriggerInputDatasetFlow { pub trigger_time: DateTime, pub dataset_id: DatasetID, diff --git a/src/domain/flow-system/domain/src/entities/flow_configuration/flow_configuration_state.rs b/src/domain/flow-system/domain/src/entities/flow_configuration/flow_configuration_state.rs index ac12d6bd2..482ad447b 100644 --- a/src/domain/flow-system/domain/src/entities/flow_configuration/flow_configuration_state.rs +++ b/src/domain/flow-system/domain/src/entities/flow_configuration/flow_configuration_state.rs @@ -27,6 +27,48 @@ impl FlowConfigurationState { pub fn is_active(&self) -> bool { self.status.is_active() } + + pub fn try_get_schedule(self) -> Option { + match self.rule { + FlowConfigurationRule::Schedule(schedule) => Some(schedule), + FlowConfigurationRule::IngestRule(ingest) => Some(ingest.schedule_condition), + FlowConfigurationRule::CompactionRule(_) + | FlowConfigurationRule::ResetRule(_) + | FlowConfigurationRule::TransformRule(_) => None, + } + } + + pub fn try_get_ingest_rule(self) -> Option { + if let FlowConfigurationRule::IngestRule(ingest_rule) = self.rule { + Some(ingest_rule) + } else { + None + } + } + + pub fn try_get_transform_rule(self) -> Option { + if let FlowConfigurationRule::TransformRule(transform_rule) = self.rule { + Some(transform_rule) + } else { + None + } + } + + pub fn try_get_compaction_rule(self) -> Option { + if let FlowConfigurationRule::CompactionRule(compation_rule) = self.rule { + Some(compation_rule) + } else { + None + } + } + + pub fn try_get_reset_rule(self) -> Option { + if let FlowConfigurationRule::ResetRule(reset_rule) = self.rule { + Some(reset_rule) + } else { + None + } + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/entities/shared/flow_run_snapshot.rs b/src/domain/flow-system/domain/src/entities/shared/flow_run_snapshot.rs index 50e589dd7..51e8ac889 100644 --- a/src/domain/flow-system/domain/src/entities/shared/flow_run_snapshot.rs +++ b/src/domain/flow-system/domain/src/entities/shared/flow_run_snapshot.rs @@ -7,11 +7,13 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use serde::{Deserialize, Serialize}; + use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum FlowConfigurationSnapshot { Transform(TransformRule), Compaction(CompactionRule), diff --git a/src/domain/flow-system/domain/src/entities/shared/flow_task_metadata.rs b/src/domain/flow-system/domain/src/entities/shared/flow_task_metadata.rs new file mode 100644 index 000000000..4d8cdc7d1 --- /dev/null +++ b/src/domain/flow-system/domain/src/entities/shared/flow_task_metadata.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub const METADATA_TASK_FLOW_ID: &str = "kamu.flow.id"; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/entities/shared/mod.rs b/src/domain/flow-system/domain/src/entities/shared/mod.rs index 232c4c932..f7272e3e6 100644 --- a/src/domain/flow-system/domain/src/entities/shared/mod.rs +++ b/src/domain/flow-system/domain/src/entities/shared/mod.rs @@ -10,6 +10,7 @@ mod compaction_rule; mod flow_key; mod flow_run_snapshot; +mod flow_task_metadata; mod flow_type; mod ingest_rule; mod reset_rule; @@ -19,6 +20,7 @@ mod transform_rule; pub use compaction_rule::*; pub use flow_key::*; pub use flow_run_snapshot::*; +pub use flow_task_metadata::*; pub use flow_type::*; pub use ingest_rule::*; pub use reset_rule::*; diff --git a/src/domain/flow-system/domain/src/executors/flow_executor.rs b/src/domain/flow-system/domain/src/executors/flow_executor.rs new file mode 100644 index 000000000..aa75007c6 --- /dev/null +++ b/src/domain/flow-system/domain/src/executors/flow_executor.rs @@ -0,0 +1,52 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use chrono::{DateTime, DurationRound, Utc}; +use internal_error::{InternalError, ResultIntoInternal}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait FlowExecutor: Sync + Send { + /// Runs initialization phase + async fn pre_run(&self, planned_start_time: DateTime) -> Result<(), InternalError>; + + /// Runs the update main loop + async fn run(&self) -> Result<(), InternalError>; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct FlowExecutorConfig { + /// Defines discretion for main scheduling loop: how often new data is + /// checked and processed + pub awaiting_step: chrono::Duration, + /// Defines minimal time between 2 runs of the same flow configuration + pub mandatory_throttling_period: chrono::Duration, +} + +impl FlowExecutorConfig { + pub fn new( + awaiting_step: chrono::Duration, + mandatory_throttling_period: chrono::Duration, + ) -> Self { + Self { + awaiting_step, + mandatory_throttling_period, + } + } + + pub fn round_time(&self, time: DateTime) -> Result, InternalError> { + let rounded_time = time.duration_round(self.awaiting_step).int_err()?; + Ok(rounded_time) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/executors/mod.rs b/src/domain/flow-system/domain/src/executors/mod.rs new file mode 100644 index 000000000..54e3893c3 --- /dev/null +++ b/src/domain/flow-system/domain/src/executors/mod.rs @@ -0,0 +1,12 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod flow_executor; + +pub use flow_executor::*; diff --git a/src/domain/flow-system/domain/src/flow_messages_types.rs b/src/domain/flow-system/domain/src/flow_messages_types.rs index c81f8aae6..0a6aabb5e 100644 --- a/src/domain/flow-system/domain/src/flow_messages_types.rs +++ b/src/domain/flow-system/domain/src/flow_messages_types.rs @@ -28,17 +28,17 @@ impl Message for FlowConfigurationUpdatedMessage {} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FlowServiceUpdatedMessage { +pub struct FlowExecutorUpdatedMessage { pub update_time: DateTime, - pub update_details: FlowServiceUpdateDetails, + pub update_details: FlowExecutorUpdateDetails, } -impl Message for FlowServiceUpdatedMessage {} +impl Message for FlowExecutorUpdatedMessage {} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum FlowServiceUpdateDetails { +pub enum FlowExecutorUpdateDetails { Loaded, ExecutedTimeslot, FlowRunning, diff --git a/src/domain/flow-system/domain/src/lib.rs b/src/domain/flow-system/domain/src/lib.rs index 4d87f54b4..6f52be704 100644 --- a/src/domain/flow-system/domain/src/lib.rs +++ b/src/domain/flow-system/domain/src/lib.rs @@ -15,15 +15,17 @@ pub use event_sourcing::*; mod flow_messages_types; -pub mod aggregates; -pub mod dataset_flow_key; -pub mod entities; -pub mod repos; -pub mod services; +mod aggregates; +mod dataset_flow_key; +mod entities; +mod executors; +mod repos; +mod services; pub use aggregates::*; pub use dataset_flow_key::*; pub use entities::*; +pub use executors::*; pub use flow_messages_types::*; pub use repos::*; pub use services::*; diff --git a/src/domain/flow-system/domain/src/repos/flow/flow_event_store.rs b/src/domain/flow-system/domain/src/repos/flow/flow_event_store.rs index 00273284c..31253f672 100644 --- a/src/domain/flow-system/domain/src/repos/flow/flow_event_store.rs +++ b/src/domain/flow-system/domain/src/repos/flow/flow_event_store.rs @@ -10,6 +10,7 @@ use std::collections::HashSet; use chrono::{DateTime, Utc}; +use database_common::PaginationOpts; use event_sourcing::EventStore; use opendatafabric::{AccountID, DatasetID}; @@ -20,7 +21,13 @@ use crate::*; #[async_trait::async_trait] pub trait FlowEventStore: EventStore { /// Generates new unique flow identifier - fn new_flow_id(&self) -> FlowID; + async fn new_flow_id(&self) -> Result; + + /// Attempts to access the pending (unfinished) flow ID for the given key + async fn try_get_pending_flow( + &self, + flow_key: &FlowKey, + ) -> Result, InternalError>; /// Returns last run statistics for the dataset flow of certain type async fn get_dataset_flow_run_stats( @@ -41,8 +48,8 @@ pub trait FlowEventStore: EventStore { fn get_all_flow_ids_by_dataset( &self, dataset_id: &DatasetID, - filters: DatasetFlowFilters, - pagination: FlowPaginationOpts, + filters: &DatasetFlowFilters, + pagination: PaginationOpts, ) -> FlowIDStream; /// Returns IDs of the flow initiators associated with the specified @@ -63,9 +70,9 @@ pub trait FlowEventStore: EventStore { /// Applies filters/pagination, if specified fn get_all_flow_ids_by_datasets( &self, - dataset_id: HashSet, + dataset_ids: HashSet, filters: &DatasetFlowFilters, - pagination: FlowPaginationOpts, + pagination: PaginationOpts, ) -> FlowIDStream; /// Returns IDs of the system flows in reverse chronological order based on @@ -73,8 +80,8 @@ pub trait FlowEventStore: EventStore { /// Applies filters/pagination, if specified fn get_all_system_flow_ids( &self, - filters: SystemFlowFilters, - pagination: FlowPaginationOpts, + filters: &SystemFlowFilters, + pagination: PaginationOpts, ) -> FlowIDStream; /// Returns number of system flows matching filters, if specified @@ -83,23 +90,20 @@ pub trait FlowEventStore: EventStore { filters: &SystemFlowFilters, ) -> Result; - /// Returns IDs of the flows of any type in reverse chronological order - /// based on creation time - /// TODO: not used yet, evaluate need in filters - fn get_all_flow_ids(&self, pagination: FlowPaginationOpts) -> FlowIDStream<'_>; + /// Returns IDs of the flows of any type matching the given filters in + /// reverse chronological order based on creation time + fn get_all_flow_ids( + &self, + filters: &AllFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream<'_>; - /// Returns number of all flows - async fn get_count_all_flows(&self) -> Result; + /// Returns number of all flows, matching filters + async fn get_count_all_flows(&self, filters: &AllFlowFilters) -> Result; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Copy, Clone)] -pub struct FlowPaginationOpts { - pub offset: usize, - pub limit: usize, -} - #[derive(Default, Debug, Clone)] pub struct DatasetFlowFilters { pub by_flow_type: Option, @@ -122,6 +126,12 @@ pub struct SystemFlowFilters { pub by_initiator: Option, } +#[derive(Default, Debug, Clone)] +pub struct AllFlowFilters { + pub by_flow_status: Option, + pub by_initiator: Option, +} + #[derive(Debug, Clone)] pub enum InitiatorFilter { System, @@ -130,7 +140,7 @@ pub enum InitiatorFilter { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Default, Debug, Clone, Copy)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] pub struct FlowRunStats { pub last_success_time: Option>, pub last_attempt_time: Option>, diff --git a/src/domain/flow-system/domain/src/repos/flow_configuration/flow_configuration_event_store.rs b/src/domain/flow-system/domain/src/repos/flow_configuration/flow_configuration_event_store.rs index 33860f712..096fc811e 100644 --- a/src/domain/flow-system/domain/src/repos/flow_configuration/flow_configuration_event_store.rs +++ b/src/domain/flow-system/domain/src/repos/flow_configuration/flow_configuration_event_store.rs @@ -26,7 +26,7 @@ pub type FailableDatasetIDStream<'a> = pub trait FlowConfigurationEventStore: EventStore { /// Returns all unique values of dataset IDs associated with update configs // TODO: re-consider performance impact - async fn list_all_dataset_ids(&self) -> FailableDatasetIDStream<'_>; + fn list_all_dataset_ids(&self) -> FailableDatasetIDStream<'_>; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/services/flow/flow_service.rs b/src/domain/flow-system/domain/src/services/flow/flow_query_service.rs similarity index 87% rename from src/domain/flow-system/domain/src/services/flow/flow_service.rs rename to src/domain/flow-system/domain/src/services/flow/flow_query_service.rs index c9638283a..163a9eb1e 100644 --- a/src/domain/flow-system/domain/src/services/flow/flow_service.rs +++ b/src/domain/flow-system/domain/src/services/flow/flow_query_service.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; +use database_common::PaginationOpts; use event_sourcing::LoadError; use internal_error::{ErrorIntoInternal, InternalError}; use opendatafabric::{AccountID, DatasetID}; @@ -19,7 +20,6 @@ use crate::{ FlowConfigurationSnapshot, FlowID, FlowKey, - FlowPaginationOpts, FlowState, SystemFlowFilters, }; @@ -27,19 +27,7 @@ use crate::{ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait FlowService: Sync + Send { - /// Runs the update main loop - async fn run(&self, planned_start_time: DateTime) -> Result<(), InternalError>; - - /// Triggers the specified flow manually, unless it's already waiting - async fn trigger_manual_flow( - &self, - trigger_time: DateTime, - flow_key: FlowKey, - initiator_account_id: AccountID, - flow_run_snapshots_maybe: Option, - ) -> Result; - +pub trait FlowQueryService: Sync + Send { /// Returns states of flows associated with a given dataset /// ordered by creation time from newest to oldest. /// Applies specified filters/pagination @@ -47,7 +35,7 @@ pub trait FlowService: Sync + Send { &self, dataset_id: &DatasetID, filters: DatasetFlowFilters, - pagination: FlowPaginationOpts, + pagination: PaginationOpts, ) -> Result; /// Returns initiators of flows associated with a given dataset @@ -73,7 +61,7 @@ pub trait FlowService: Sync + Send { &self, account_id: &AccountID, filters: AccountFlowFilters, - pagination: FlowPaginationOpts, + pagination: PaginationOpts, ) -> Result; /// Returns states of system flows associated with a given dataset @@ -82,19 +70,28 @@ pub trait FlowService: Sync + Send { async fn list_all_system_flows( &self, filters: SystemFlowFilters, - pagination: FlowPaginationOpts, + pagination: PaginationOpts, ) -> Result; /// Returns state of all flows, whether they are system-level or /// dataset-bound, ordered by creation time from newest to oldest async fn list_all_flows( &self, - pagination: FlowPaginationOpts, + pagination: PaginationOpts, ) -> Result; /// Returns current state of a given flow async fn get_flow(&self, flow_id: FlowID) -> Result; + /// Triggers the specified flow manually, unless it's already waiting + async fn trigger_manual_flow( + &self, + trigger_time: DateTime, + flow_key: FlowKey, + initiator_account_id: AccountID, + flow_run_snapshots_maybe: Option, + ) -> Result; + /// Attempts to cancel the tasks already scheduled for the given flow async fn cancel_scheduled_tasks( &self, @@ -132,12 +129,6 @@ pub type DatasetsStream<'a> = //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(thiserror::Error, Debug)] -pub enum RequestFlowError { - #[error(transparent)] - Internal(#[from] InternalError), -} - #[derive(thiserror::Error, Debug)] pub enum ListFlowsByDatasetError { #[error(transparent)] @@ -176,6 +167,12 @@ pub enum GetFlowError { Internal(#[from] InternalError), } +#[derive(thiserror::Error, Debug)] +pub enum RequestFlowError { + #[error(transparent)] + Internal(#[from] InternalError), +} + #[derive(thiserror::Error, Debug)] pub enum CancelScheduledTasksError { #[error(transparent)] @@ -217,26 +214,3 @@ impl From> for CancelScheduledTasksError { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug)] -pub struct FlowServiceRunConfig { - /// Defines discretion for main scheduling loop: how often new data is - /// checked and processed - pub awaiting_step: chrono::Duration, - /// Defines minimal time between 2 runs of the same flow configuration - pub mandatory_throttling_period: chrono::Duration, -} - -impl FlowServiceRunConfig { - pub fn new( - awaiting_step: chrono::Duration, - mandatory_throttling_period: chrono::Duration, - ) -> Self { - Self { - awaiting_step, - mandatory_throttling_period, - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs b/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs index e4308749d..71a96bec1 100644 --- a/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs +++ b/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; +use dill::Catalog; use internal_error::InternalError; use kamu_task_system::TaskID; @@ -16,13 +17,11 @@ use crate::FlowID; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait FlowServiceTestDriver: Sync + Send { - /// Pretends running started - fn mimic_running_started(&self); - +pub trait FlowExecutorTestDriver: Sync + Send { /// Pretends it is time to schedule the given flow that was in Queued state async fn mimic_flow_scheduled( &self, + target_catalog: &Catalog, flow_id: FlowID, schedule_time: DateTime, ) -> Result; diff --git a/src/domain/flow-system/domain/src/services/flow/flow_time_wheel_service.rs b/src/domain/flow-system/domain/src/services/flow/flow_time_wheel_service.rs new file mode 100644 index 000000000..1fcddd9dc --- /dev/null +++ b/src/domain/flow-system/domain/src/services/flow/flow_time_wheel_service.rs @@ -0,0 +1,44 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use chrono::{DateTime, Utc}; +use thiserror::Error; + +use crate::FlowID; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub trait FlowTimeWheelService: Send + Sync { + fn nearest_activation_moment(&self) -> Option>; + + fn take_nearest_planned_flows(&self) -> Vec; + + fn activate_at(&self, activation_time: DateTime, flow_id: FlowID); + + fn get_planned_flow_activation_time(&self, flow_id: FlowID) -> Option>; + + fn cancel_flow_activation(&self, flow_id: FlowID) + -> Result<(), TimeWheelCancelActivationError>; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub enum TimeWheelCancelActivationError { + #[error(transparent)] + FlowNotPlanned(TimeWheelFlowNotPlannedError), +} + +#[derive(Error, Debug)] +#[error("Flow '{flow_id}' not found planned in the time wheel")] +pub struct TimeWheelFlowNotPlannedError { + pub flow_id: FlowID, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/services/flow/mod.rs b/src/domain/flow-system/domain/src/services/flow/mod.rs index 71124e06b..bccb42dc2 100644 --- a/src/domain/flow-system/domain/src/services/flow/mod.rs +++ b/src/domain/flow-system/domain/src/services/flow/mod.rs @@ -7,8 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -mod flow_service; +mod flow_query_service; mod flow_service_test_driver; +mod flow_time_wheel_service; -pub use flow_service::*; +pub use flow_query_service::*; pub use flow_service_test_driver::*; +pub use flow_time_wheel_service::*; diff --git a/src/domain/flow-system/domain/src/services/flow_configuration/flow_configuration_service.rs b/src/domain/flow-system/domain/src/services/flow_configuration/flow_configuration_service.rs index 46af7e334..fcad75195 100644 --- a/src/domain/flow-system/domain/src/services/flow_configuration/flow_configuration_service.rs +++ b/src/domain/flow-system/domain/src/services/flow_configuration/flow_configuration_service.rs @@ -13,13 +13,7 @@ use internal_error::{ErrorIntoInternal, InternalError}; use opendatafabric::DatasetID; use tokio_stream::Stream; -use crate::{ - DatasetFlowType, - FlowConfigurationRule, - FlowConfigurationState, - FlowKey, - SystemFlowType, -}; +use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -101,6 +95,184 @@ pub trait FlowConfigurationService: Sync + Send { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[async_trait::async_trait] +pub trait FlowConfigurationServiceExt { + async fn try_get_flow_schedule( + &self, + flow_key: FlowKey, + ) -> Result, FindFlowConfigurationError>; + + async fn try_get_dataset_transform_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError>; + + async fn try_get_dataset_ingest_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError>; + + async fn try_get_dataset_compaction_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError>; + + async fn try_get_dataset_reset_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError>; + + async fn try_get_config_snapshot_by_key( + &self, + flow_key: FlowKey, + ) -> Result, FindFlowConfigurationError>; +} + +#[async_trait::async_trait] +impl FlowConfigurationServiceExt for T { + async fn try_get_flow_schedule( + &self, + flow_key: FlowKey, + ) -> Result, FindFlowConfigurationError> { + let maybe_config = self.find_configuration(flow_key).await?; + Ok( + if let Some(config) = maybe_config + && config.is_active() + { + config.try_get_schedule() + } else { + None + }, + ) + } + + async fn try_get_dataset_transform_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError> { + let maybe_config = self + .find_configuration(FlowKey::dataset(dataset_id, flow_type)) + .await?; + Ok( + if let Some(config) = maybe_config + && config.is_active() + { + config.try_get_transform_rule() + } else { + None + }, + ) + } + + async fn try_get_dataset_ingest_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError> { + let maybe_config = self + .find_configuration(FlowKey::dataset(dataset_id, flow_type)) + .await?; + Ok( + if let Some(config) = maybe_config + && config.is_active() + { + config.try_get_ingest_rule() + } else { + None + }, + ) + } + + async fn try_get_dataset_compaction_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError> { + let maybe_config = self + .find_configuration(FlowKey::dataset(dataset_id, flow_type)) + .await?; + Ok( + if let Some(config) = maybe_config + && config.is_active() + { + config.try_get_compaction_rule() + } else { + None + }, + ) + } + + async fn try_get_dataset_reset_rule( + &self, + dataset_id: DatasetID, + flow_type: DatasetFlowType, + ) -> Result, FindFlowConfigurationError> { + let maybe_config = self + .find_configuration(FlowKey::dataset(dataset_id, flow_type)) + .await?; + Ok( + if let Some(config) = maybe_config + && config.is_active() + { + config.try_get_reset_rule() + } else { + None + }, + ) + } + + async fn try_get_config_snapshot_by_key( + &self, + flow_key: FlowKey, + ) -> Result, FindFlowConfigurationError> { + let maybe_snapshot = match flow_key { + FlowKey::System(_) => self + .try_get_flow_schedule(flow_key) + .await? + .map(FlowConfigurationSnapshot::Schedule), + FlowKey::Dataset(dataset_flow_key) => match dataset_flow_key.flow_type { + DatasetFlowType::ExecuteTransform => self + .try_get_dataset_transform_rule( + dataset_flow_key.dataset_id, + dataset_flow_key.flow_type, + ) + .await? + .map(FlowConfigurationSnapshot::Transform), + DatasetFlowType::Ingest => self + .try_get_dataset_ingest_rule( + dataset_flow_key.dataset_id, + dataset_flow_key.flow_type, + ) + .await? + .map(FlowConfigurationSnapshot::Ingest), + DatasetFlowType::Reset => self + .try_get_dataset_reset_rule( + dataset_flow_key.dataset_id, + dataset_flow_key.flow_type, + ) + .await? + .map(FlowConfigurationSnapshot::Reset), + DatasetFlowType::HardCompaction => self + .try_get_dataset_compaction_rule( + dataset_flow_key.dataset_id, + dataset_flow_key.flow_type, + ) + .await? + .map(FlowConfigurationSnapshot::Compaction), + }, + }; + + Ok(maybe_snapshot) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(thiserror::Error, Debug)] pub enum SetFlowConfigurationError { #[error(transparent)] diff --git a/src/domain/flow-system/services/src/dependencies.rs b/src/domain/flow-system/services/src/dependencies.rs new file mode 100644 index 000000000..dfa020574 --- /dev/null +++ b/src/domain/flow-system/services/src/dependencies.rs @@ -0,0 +1,26 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use dill::CatalogBuilder; + +use crate::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub fn register_dependencies(catalog_builder: &mut CatalogBuilder) { + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); + + catalog_builder.add::(); + catalog_builder.add::(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/active_configs_state.rs b/src/domain/flow-system/services/src/flow/active_configs_state.rs deleted file mode 100644 index 38c9975fa..000000000 --- a/src/domain/flow-system/services/src/flow/active_configs_state.rs +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::collections::HashMap; - -use kamu_flow_system::*; -use opendatafabric::DatasetID; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Default)] -pub(crate) struct ActiveConfigsState { - system_schedules: HashMap, - dataset_transform_rules: HashMap, - dataset_reset_rules: HashMap, - dataset_compaction_rules: HashMap, - dataset_ingest_rules: HashMap, -} - -impl ActiveConfigsState { - pub fn add_dataset_flow_config( - &mut self, - flow_key: &FlowKeyDataset, - rule: FlowConfigurationRule, - ) { - let key = flow_key.clone(); - match rule { - FlowConfigurationRule::Schedule(_) => { - unreachable!() - } - FlowConfigurationRule::IngestRule(ingest_rule) => { - self.dataset_ingest_rules.insert(key, ingest_rule); - } - FlowConfigurationRule::ResetRule(reset) => { - self.dataset_reset_rules.insert(key, reset); - } - FlowConfigurationRule::TransformRule(transform) => { - self.dataset_transform_rules.insert(key, transform); - } - FlowConfigurationRule::CompactionRule(compaction) => { - self.dataset_compaction_rules.insert(key, compaction); - } - } - } - - pub fn add_system_flow_config(&mut self, flow_type: SystemFlowType, schedule: Schedule) { - self.system_schedules.insert(flow_type, schedule); - } - - pub fn drop_dataset_configs(&mut self, dataset_id: &DatasetID) { - for flow_type in DatasetFlowType::all() { - self.drop_dataset_flow_config(BorrowedFlowKeyDataset::new(dataset_id, *flow_type)); - } - } - - pub fn drop_flow_config(&mut self, flow_key: &FlowKey) { - match flow_key { - FlowKey::Dataset(flow_key) => { - self.drop_dataset_flow_config(flow_key.borrowed_key()); - } - FlowKey::System(flow_key) => { - self.system_schedules.remove(&flow_key.flow_type); - } - } - } - - fn drop_dataset_flow_config(&mut self, flow_key: BorrowedFlowKeyDataset) { - self.dataset_ingest_rules.remove(flow_key.as_trait()); - self.dataset_transform_rules.remove(flow_key.as_trait()); - self.dataset_compaction_rules.remove(flow_key.as_trait()); - self.dataset_reset_rules.remove(flow_key.as_trait()); - } - - pub fn try_get_flow_schedule(&self, flow_key: &FlowKey) -> Option { - match flow_key { - FlowKey::Dataset(flow_key) => self - .dataset_ingest_rules - .get( - BorrowedFlowKeyDataset::new(&flow_key.dataset_id, flow_key.flow_type) - .as_trait(), - ) - .map(|ingest_rule| ingest_rule.schedule_condition.clone()), - FlowKey::System(flow_key) => self.system_schedules.get(&flow_key.flow_type).cloned(), - } - } - - pub fn try_get_dataset_transform_rule( - &self, - dataset_id: &DatasetID, - flow_type: DatasetFlowType, - ) -> Option { - self.dataset_transform_rules - .get(BorrowedFlowKeyDataset::new(dataset_id, flow_type).as_trait()) - .copied() - } - - pub fn try_get_dataset_ingest_rule( - &self, - dataset_id: &DatasetID, - flow_type: DatasetFlowType, - ) -> Option { - self.dataset_ingest_rules - .get(BorrowedFlowKeyDataset::new(dataset_id, flow_type).as_trait()) - .cloned() - } - - pub fn try_get_dataset_compaction_rule( - &self, - dataset_id: &DatasetID, - flow_type: DatasetFlowType, - ) -> Option { - self.dataset_compaction_rules - .get(BorrowedFlowKeyDataset::new(dataset_id, flow_type).as_trait()) - .copied() - } - - pub fn try_get_dataset_reset_rule( - &self, - dataset_id: &DatasetID, - flow_type: DatasetFlowType, - ) -> Option { - self.dataset_reset_rules - .get(BorrowedFlowKeyDataset::new(dataset_id, flow_type).as_trait()) - .cloned() - } - - pub fn try_get_config_snapshot_by_key( - &self, - flow_key: &FlowKey, - ) -> Option { - match flow_key { - FlowKey::System(_) => self - .try_get_flow_schedule(flow_key) - .map(FlowConfigurationSnapshot::Schedule), - FlowKey::Dataset(dataset_flow_key) => match dataset_flow_key.flow_type { - DatasetFlowType::ExecuteTransform => self - .try_get_dataset_transform_rule( - &dataset_flow_key.dataset_id, - dataset_flow_key.flow_type, - ) - .map(FlowConfigurationSnapshot::Transform), - DatasetFlowType::Ingest => self - .try_get_dataset_ingest_rule( - &dataset_flow_key.dataset_id, - dataset_flow_key.flow_type, - ) - .map(FlowConfigurationSnapshot::Ingest), - DatasetFlowType::Reset => self - .try_get_dataset_reset_rule( - &dataset_flow_key.dataset_id, - dataset_flow_key.flow_type, - ) - .map(FlowConfigurationSnapshot::Reset), - DatasetFlowType::HardCompaction => self - .try_get_dataset_compaction_rule( - &dataset_flow_key.dataset_id, - dataset_flow_key.flow_type, - ) - .map(FlowConfigurationSnapshot::Compaction), - }, - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/flow_abort_helper.rs b/src/domain/flow-system/services/src/flow/flow_abort_helper.rs new file mode 100644 index 000000000..b526c4eaa --- /dev/null +++ b/src/domain/flow-system/services/src/flow/flow_abort_helper.rs @@ -0,0 +1,65 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use dill::component; +use internal_error::{InternalError, ResultIntoInternal}; +use kamu_flow_system::{Flow, FlowEventStore, FlowID}; +use kamu_task_system::TaskScheduler; +use time_source::SystemTimeSource; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub(crate) struct FlowAbortHelper { + flow_event_store: Arc, + time_source: Arc, + task_scheduler: Arc, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +impl FlowAbortHelper { + pub(crate) fn new( + flow_event_store: Arc, + time_source: Arc, + task_scheduler: Arc, + ) -> Self { + Self { + flow_event_store, + time_source, + task_scheduler, + } + } + + pub(crate) async fn abort_flow(&self, flow_id: FlowID) -> Result<(), InternalError> { + // Mark flow as aborted + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) + .await + .int_err()?; + + self.abort_loaded_flow(&mut flow).await + } + + pub(crate) async fn abort_loaded_flow(&self, flow: &mut Flow) -> Result<(), InternalError> { + // Abort flow itself + flow.abort(self.time_source.now()).int_err()?; + flow.save(self.flow_event_store.as_ref()).await.int_err()?; + + // Cancel associated tasks + for task_id in &flow.task_ids { + self.task_scheduler.cancel_task(*task_id).await.int_err()?; + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/flow_enqueue_helper.rs b/src/domain/flow-system/services/src/flow/flow_enqueue_helper.rs new file mode 100644 index 000000000..07ba1ed5c --- /dev/null +++ b/src/domain/flow-system/services/src/flow/flow_enqueue_helper.rs @@ -0,0 +1,665 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use dill::component; +use internal_error::InternalError; +use kamu_core::{DatasetChangesService, DatasetOwnershipService, DependencyGraphService}; +use kamu_flow_system::*; +use time_source::SystemTimeSource; + +use super::{DownstreamDependencyFlowPlan, FlowTriggerContext}; +use crate::DownstreamDependencyTriggerType; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub(crate) struct FlowEnqueueHelper { + flow_timewheel_service: Arc, + flow_event_store: Arc, + flow_configuration_service: Arc, + dataset_changes_service: Arc, + dependency_graph_service: Arc, + dataset_ownership_service: Arc, + time_source: Arc, + executor_config: Arc, +} + +#[component(pub)] +impl FlowEnqueueHelper { + pub(crate) fn new( + flow_timewheel_service: Arc, + flow_event_store: Arc, + flow_configuration_service: Arc, + dataset_changes_service: Arc, + dependency_graph_service: Arc, + dataset_ownership_service: Arc, + time_source: Arc, + executor_config: Arc, + ) -> Self { + Self { + flow_timewheel_service, + flow_event_store, + flow_configuration_service, + dataset_changes_service, + dependency_graph_service, + dataset_ownership_service, + time_source, + executor_config, + } + } + + #[tracing::instrument(level = "trace", skip_all, fields(?flow_key, ?rule))] + pub(crate) async fn activate_flow_configuration( + &self, + start_time: DateTime, + flow_key: FlowKey, + rule: FlowConfigurationRule, + ) -> Result<(), InternalError> { + match &flow_key { + FlowKey::Dataset(_) => { + match &rule { + FlowConfigurationRule::TransformRule(_) => { + self.enqueue_auto_polling_flow_unconditionally(start_time, &flow_key) + .await?; + } + FlowConfigurationRule::IngestRule(ingest_rule) => { + self.enqueue_scheduled_auto_polling_flow( + start_time, + &flow_key, + &ingest_rule.schedule_condition, + ) + .await?; + } + // Such as compaction and reset is very dangerous operation we + // skip running it during activation flow configurations. + // And schedule will be used only for system flows + FlowConfigurationRule::CompactionRule(_) + | FlowConfigurationRule::Schedule(_) + | FlowConfigurationRule::ResetRule(_) => (), + } + } + FlowKey::System(_) => { + if let FlowConfigurationRule::Schedule(schedule) = &rule { + self.enqueue_scheduled_auto_polling_flow(start_time, &flow_key, schedule) + .await?; + } else { + unimplemented!( + "Doubt will ever need to schedule system flows via batching rules" + ) + } + } + } + + Ok(()) + } + + #[tracing::instrument(level = "trace", skip_all, fields(?flow_key))] + pub(crate) async fn try_enqueue_scheduled_auto_polling_flow_if_enabled( + &self, + start_time: DateTime, + flow_key: &FlowKey, + ) -> Result<(), InternalError> { + let maybe_active_schedule = self + .flow_configuration_service + .try_get_flow_schedule(flow_key.clone()) + .await + .int_err()?; + + if let Some(active_schedule) = maybe_active_schedule { + self.enqueue_scheduled_auto_polling_flow(start_time, flow_key, &active_schedule) + .await?; + } + + Ok(()) + } + + #[tracing::instrument(level = "trace", skip_all, fields(?flow.flow_key, %flow.flow_id, ))] + pub(crate) async fn enqueue_dependent_flows( + &self, + input_success_time: DateTime, + flow: &Flow, + flow_result: &FlowResult, + ) -> Result<(), InternalError> { + if let FlowKey::Dataset(fk_dataset) = &flow.flow_key { + let dependent_dataset_flow_plans = self + .make_downstream_dependencies_flow_plans(fk_dataset, flow.config_snapshot.as_ref()) + .await?; + if dependent_dataset_flow_plans.is_empty() { + return Ok(()); + } + let trigger = FlowTrigger::InputDatasetFlow(FlowTriggerInputDatasetFlow { + trigger_time: input_success_time, + dataset_id: fk_dataset.dataset_id.clone(), + flow_type: fk_dataset.flow_type, + flow_id: flow.flow_id, + flow_result: flow_result.clone(), + }); + // For each, trigger needed flow + for dependent_dataset_flow_plan in dependent_dataset_flow_plans { + self.trigger_flow_common( + &dependent_dataset_flow_plan.flow_key, + trigger.clone(), + dependent_dataset_flow_plan.flow_trigger_context, + dependent_dataset_flow_plan.maybe_config_snapshot, + ) + .await?; + } + + Ok(()) + } else { + unreachable!("Not expecting other types of flow keys than dataset"); + } + } + + async fn make_downstream_dependencies_flow_plans( + &self, + fk_dataset: &FlowKeyDataset, + maybe_config_snapshot: Option<&FlowConfigurationSnapshot>, + ) -> Result, InternalError> { + // ToDo: extend dependency graph with possibility to fetch downstream + // dependencies by owner + use futures::StreamExt; + let dependent_dataset_ids: Vec<_> = self + .dependency_graph_service + .get_downstream_dependencies(&fk_dataset.dataset_id) + .await + .int_err()? + .collect() + .await; + + let mut plans: Vec = vec![]; + if dependent_dataset_ids.is_empty() { + return Ok(plans); + } + + match self.classify_dependent_trigger_type(fk_dataset.flow_type, maybe_config_snapshot) { + DownstreamDependencyTriggerType::TriggerAllEnabledExecuteTransform => { + for dataset_id in dependent_dataset_ids { + if let Some(transform_rule) = self + .flow_configuration_service + .try_get_dataset_transform_rule( + dataset_id.clone(), + DatasetFlowType::ExecuteTransform, + ) + .await + .int_err()? + { + plans.push(DownstreamDependencyFlowPlan { + flow_key: FlowKeyDataset::new( + dataset_id, + DatasetFlowType::ExecuteTransform, + ) + .into(), + flow_trigger_context: FlowTriggerContext::Batching(transform_rule), + maybe_config_snapshot: None, + }); + }; + } + } + + DownstreamDependencyTriggerType::TriggerOwnHardCompaction => { + let dataset_owner_account_ids = self + .dataset_ownership_service + .get_dataset_owners(&fk_dataset.dataset_id) + .await?; + + for dependent_dataset_id in dependent_dataset_ids { + for owner_account_id in &dataset_owner_account_ids { + if self + .dataset_ownership_service + .is_dataset_owned_by(&dependent_dataset_id, owner_account_id) + .await? + { + plans.push(DownstreamDependencyFlowPlan { + flow_key: FlowKeyDataset::new( + dependent_dataset_id.clone(), + DatasetFlowType::HardCompaction, + ) + .into(), + flow_trigger_context: FlowTriggerContext::Unconditional, + // Currently we trigger Hard compaction recursively only in keep + // metadata only mode + maybe_config_snapshot: Some(FlowConfigurationSnapshot::Compaction( + CompactionRule::MetadataOnly(CompactionRuleMetadataOnly { + recursive: true, + }), + )), + }); + break; + } + } + } + } + + DownstreamDependencyTriggerType::Empty => {} + } + + Ok(plans) + } + + fn classify_dependent_trigger_type( + &self, + dataset_flow_type: DatasetFlowType, + maybe_config_snapshot: Option<&FlowConfigurationSnapshot>, + ) -> DownstreamDependencyTriggerType { + match dataset_flow_type { + DatasetFlowType::Ingest | DatasetFlowType::ExecuteTransform => { + DownstreamDependencyTriggerType::TriggerAllEnabledExecuteTransform + } + DatasetFlowType::HardCompaction => { + if let Some(config_snapshot) = &maybe_config_snapshot + && let FlowConfigurationSnapshot::Compaction(compaction_rule) = config_snapshot + { + if compaction_rule.recursive() { + DownstreamDependencyTriggerType::TriggerOwnHardCompaction + } else { + DownstreamDependencyTriggerType::Empty + } + } else { + DownstreamDependencyTriggerType::TriggerAllEnabledExecuteTransform + } + } + DatasetFlowType::Reset => { + if let Some(config_snapshot) = &maybe_config_snapshot + && let FlowConfigurationSnapshot::Reset(reset_rule) = config_snapshot + && reset_rule.recursive + { + DownstreamDependencyTriggerType::TriggerOwnHardCompaction + } else { + DownstreamDependencyTriggerType::Empty + } + } + } + } + + #[tracing::instrument(level = "trace", skip_all, fields(?flow_key, ?schedule))] + pub(crate) async fn enqueue_scheduled_auto_polling_flow( + &self, + start_time: DateTime, + flow_key: &FlowKey, + schedule: &Schedule, + ) -> Result { + self.trigger_flow_common( + flow_key, + FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: start_time, + }), + FlowTriggerContext::Scheduled(schedule.clone()), + None, + ) + .await + } + + #[tracing::instrument(level = "trace", skip_all, fields(?flow_key))] + pub(crate) async fn enqueue_auto_polling_flow_unconditionally( + &self, + start_time: DateTime, + flow_key: &FlowKey, + ) -> Result { + // Very similar to manual trigger, but automatic reasons + self.trigger_flow_common( + flow_key, + FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: start_time, + }), + FlowTriggerContext::Unconditional, + None, + ) + .await + } + + pub(crate) async fn trigger_flow_common( + &self, + flow_key: &FlowKey, + trigger: FlowTrigger, + context: FlowTriggerContext, + config_snapshot_maybe: Option, + ) -> Result { + // Query previous runs stats to determine activation time + let flow_run_stats = self.flow_run_stats(flow_key).await?; + + // Flows may not be attempted more frequent than mandatory throttling period. + // If flow has never run before, let it go without restriction. + let trigger_time = trigger.trigger_time(); + let mut throttling_boundary_time = + flow_run_stats.last_attempt_time.map_or(trigger_time, |t| { + t + self.executor_config.mandatory_throttling_period + }); + // It's also possible we are waiting for some start condition much longer.. + if throttling_boundary_time < trigger_time { + throttling_boundary_time = trigger_time; + } + + // Is a pending flow present for this config? + match self.find_pending_flow(flow_key).await? { + // Already pending flow + Some(flow_id) => { + // Load, merge triggers, update activation time + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) + .await + .int_err()?; + + // Only merge unique triggers, ignore identical + flow.add_trigger_if_unique(self.time_source.now(), trigger) + .int_err()?; + + match context { + FlowTriggerContext::Batching(transform_rule) => { + // Is this rule still waited? + if matches!(flow.start_condition, Some(FlowStartCondition::Batching(_))) { + self.evaluate_flow_transform_rule( + trigger_time, + &mut flow, + &transform_rule, + throttling_boundary_time, + ) + .await?; + } else { + // Skip, the flow waits for something else + } + } + FlowTriggerContext::Scheduled(_) | FlowTriggerContext::Unconditional => { + // Evaluate throttling condition: is new time earlier than planned? + let planned_time = self + .find_planned_flow_activation_time(flow.flow_id) + .expect("Flow expected to have activation time by now"); + + if throttling_boundary_time < planned_time { + // If so, enqueue the flow earlier + self.enqueue_flow(flow.flow_id, throttling_boundary_time); + + // Indicate throttling, if applied + if throttling_boundary_time > trigger_time { + self.indicate_throttling_activity( + &mut flow, + throttling_boundary_time, + trigger_time, + )?; + } + } + } + } + + flow.save(self.flow_event_store.as_ref()).await.int_err()?; + Ok(flow.into()) + } + + // Otherwise, initiate a new flow, and enqueue it in the time wheel + None => { + // Initiate new flow + let config_snapshot_maybe = if config_snapshot_maybe.is_some() { + config_snapshot_maybe + } else { + self.flow_configuration_service + .try_get_config_snapshot_by_key(flow_key.clone()) + .await + .int_err()? + }; + let mut flow = self + .make_new_flow( + self.flow_event_store.as_ref(), + flow_key.clone(), + trigger, + config_snapshot_maybe, + ) + .await?; + + match context { + FlowTriggerContext::Batching(transform_rule) => { + // Don't activate if batching condition not satisfied + self.evaluate_flow_transform_rule( + trigger_time, + &mut flow, + &transform_rule, + throttling_boundary_time, + ) + .await?; + } + FlowTriggerContext::Scheduled(schedule) => { + // Next activation time depends on: + // - last success time, if ever launched + // - schedule, if defined + let naive_next_activation_time = schedule + .next_activation_time(trigger_time, flow_run_stats.last_success_time); + + // Apply throttling boundary + let next_activation_time = + std::cmp::max(throttling_boundary_time, naive_next_activation_time); + self.enqueue_flow(flow.flow_id, next_activation_time); + + // Set throttling activity as start condition + if throttling_boundary_time > naive_next_activation_time { + self.indicate_throttling_activity( + &mut flow, + throttling_boundary_time, + naive_next_activation_time, + )?; + } else if naive_next_activation_time > trigger_time { + // Set waiting according to the schedule + flow.set_relevant_start_condition( + self.time_source.now(), + FlowStartCondition::Schedule(FlowStartConditionSchedule { + wake_up_at: naive_next_activation_time, + }), + ) + .int_err()?; + } + } + FlowTriggerContext::Unconditional => { + // Apply throttling boundary + let next_activation_time = + std::cmp::max(throttling_boundary_time, trigger_time); + self.enqueue_flow(flow.flow_id, next_activation_time); + + // Set throttling activity as start condition + if throttling_boundary_time > trigger_time { + self.indicate_throttling_activity( + &mut flow, + throttling_boundary_time, + trigger_time, + )?; + } + } + } + + flow.save(self.flow_event_store.as_ref()).await.int_err()?; + Ok(flow.into()) + } + } + } + + async fn evaluate_flow_transform_rule( + &self, + evaluation_time: DateTime, + flow: &mut Flow, + transform_rule: &TransformRule, + throttling_boundary_time: DateTime, + ) -> Result<(), InternalError> { + assert!(matches!( + flow.flow_key.get_type(), + AnyFlowType::Dataset( + DatasetFlowType::ExecuteTransform | DatasetFlowType::HardCompaction + ) + )); + + // TODO: it's likely assumed the accumulation is per each input separately, but + // for now count overall number + let mut accumulated_records_count = 0; + let mut watermark_modified = false; + let mut is_compacted = false; + + // Scan each accumulated trigger to decide + for trigger in &flow.triggers { + if let FlowTrigger::InputDatasetFlow(trigger) = trigger { + match &trigger.flow_result { + FlowResult::Empty | FlowResult::DatasetReset(_) => {} + FlowResult::DatasetCompact(_) => { + is_compacted = true; + } + FlowResult::DatasetUpdate(update) => { + // Compute increment since the first trigger by this dataset. + // Note: there might have been multiple updates since that time. + // We are only recording the first trigger of particular dataset. + if let FlowResultDatasetUpdate::Changed(update_result) = update { + let increment = self + .dataset_changes_service + .get_increment_since( + &trigger.dataset_id, + update_result.old_head.as_ref(), + ) + .await + .int_err()?; + + accumulated_records_count += increment.num_records; + watermark_modified |= increment.updated_watermark.is_some(); + } + } + } + } + } + + // The timeout for batching will happen at: + let batching_deadline = + flow.primary_trigger().trigger_time() + *transform_rule.max_batching_interval(); + + // Accumulated something if at least some input changed or watermark was touched + let accumulated_something = accumulated_records_count > 0 || watermark_modified; + + // The condition is satisfied if + // - we crossed the number of new records thresholds + // - or waited long enough, assuming + // - there is at least some change of the inputs + // - watermark got touched + let satisfied = accumulated_something + && (accumulated_records_count >= transform_rule.min_records_to_await() + || evaluation_time >= batching_deadline); + + // Set batching condition data, but only during the first rule evaluation. + if !matches!( + flow.start_condition.as_ref(), + Some(FlowStartCondition::Batching(_)) + ) { + flow.set_relevant_start_condition( + self.time_source.now(), + FlowStartCondition::Batching(FlowStartConditionBatching { + active_transform_rule: *transform_rule, + batching_deadline, + }), + ) + .int_err()?; + } + + // If we accumulated at least something (records or watermarks), + // the upper bound of potential finish time for batching is known + if accumulated_something || is_compacted { + // Finish immediately if satisfied, or not later than the deadline + let batching_finish_time = if satisfied || is_compacted { + evaluation_time + } else { + batching_deadline + }; + + // Throttling boundary correction + let corrected_finish_time = + std::cmp::max(batching_finish_time, throttling_boundary_time); + + let should_activate = match self.find_planned_flow_activation_time(flow.flow_id) { + Some(activation_time) => activation_time > corrected_finish_time, + None => true, + }; + if should_activate { + self.enqueue_flow(flow.flow_id, corrected_finish_time); + } + + // If batching is over, it's start condition is no longer valid. + // However, set throttling condition, if it applies + if (satisfied || is_compacted) && throttling_boundary_time > batching_finish_time { + self.indicate_throttling_activity( + flow, + throttling_boundary_time, + batching_finish_time, + )?; + } + } + + Ok(()) + } + + fn indicate_throttling_activity( + &self, + flow: &mut Flow, + wake_up_at: DateTime, + shifted_from: DateTime, + ) -> Result<(), InternalError> { + flow.set_relevant_start_condition( + self.time_source.now(), + FlowStartCondition::Throttling(FlowStartConditionThrottling { + interval: self.executor_config.mandatory_throttling_period, + wake_up_at, + shifted_from, + }), + ) + .int_err()?; + Ok(()) + } + + async fn find_pending_flow(&self, flow_key: &FlowKey) -> Result, InternalError> { + self.flow_event_store.try_get_pending_flow(flow_key).await + } + + #[inline] + fn find_planned_flow_activation_time(&self, flow_id: FlowID) -> Option> { + self.flow_timewheel_service + .get_planned_flow_activation_time(flow_id) + } + + #[tracing::instrument(level = "trace", skip_all, fields(?flow_key, ?trigger))] + async fn make_new_flow( + &self, + flow_event_store: &dyn FlowEventStore, + flow_key: FlowKey, + trigger: FlowTrigger, + config_snapshot: Option, + ) -> Result { + let flow = Flow::new( + self.time_source.now(), + flow_event_store.new_flow_id().await?, + flow_key, + trigger, + config_snapshot, + ); + + Ok(flow) + } + + async fn flow_run_stats(&self, flow_key: &FlowKey) -> Result { + match flow_key { + FlowKey::Dataset(fk_dataset) => { + self.flow_event_store + .get_dataset_flow_run_stats(&fk_dataset.dataset_id, fk_dataset.flow_type) + .await + } + FlowKey::System(fk_system) => { + self.flow_event_store + .get_system_flow_run_stats(fk_system.flow_type) + .await + } + } + } + + #[tracing::instrument(level = "trace", skip_all, fields(%flow_id, %activation_time))] + fn enqueue_flow(&self, flow_id: FlowID, activation_time: DateTime) { + self.flow_timewheel_service + .activate_at(activation_time, flow_id); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/flow_executor_impl.rs b/src/domain/flow-system/services/src/flow/flow_executor_impl.rs new file mode 100644 index 000000000..09dfc25cb --- /dev/null +++ b/src/domain/flow-system/services/src/flow/flow_executor_impl.rs @@ -0,0 +1,726 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use database_common::{DatabaseTransactionRunner, PaginationOpts}; +use dill::*; +use futures::TryStreamExt; +use internal_error::InternalError; +use kamu_core::{DatasetLifecycleMessage, MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE}; +use kamu_flow_system::*; +use kamu_task_system::*; +use messaging_outbox::{ + MessageConsumer, + MessageConsumerMeta, + MessageConsumerT, + MessageConsumptionDurability, + Outbox, + OutboxExt, +}; +use time_source::SystemTimeSource; + +use crate::{ + FlowAbortHelper, + FlowEnqueueHelper, + MESSAGE_CONSUMER_KAMU_FLOW_EXECUTOR, + MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, + MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct FlowExecutorImpl { + catalog: Catalog, + flow_time_wheel_service: Arc, + time_source: Arc, + executor_config: Arc, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn FlowExecutor)] +#[interface(dyn FlowExecutorTestDriver)] +#[interface(dyn MessageConsumer)] +#[interface(dyn MessageConsumerT)] +#[interface(dyn MessageConsumerT)] +#[interface(dyn MessageConsumerT)] +#[meta(MessageConsumerMeta { + consumer_name: MESSAGE_CONSUMER_KAMU_FLOW_EXECUTOR, + feeding_producers: &[ + MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, + MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE + ], + durability: MessageConsumptionDurability::Durable, +})] +#[scope(Singleton)] +impl FlowExecutorImpl { + pub fn new( + catalog: Catalog, + flow_time_wheel_service: Arc, + time_source: Arc, + executor_config: Arc, + ) -> Self { + Self { + catalog, + flow_time_wheel_service, + time_source, + executor_config, + } + } + + #[tracing::instrument(level = "debug", skip_all)] + async fn run_current_timeslot( + &self, + target_catalog: &Catalog, + timeslot_time: DateTime, + ) -> Result<(), InternalError> { + let planned_flow_ids: Vec<_> = self.flow_time_wheel_service.take_nearest_planned_flows(); + + let mut planned_task_futures = Vec::new(); + for planned_flow_id in planned_flow_ids { + let target_catalog = target_catalog.clone(); + let flow_event_store: Arc = + target_catalog.get_one::().unwrap(); + + planned_task_futures.push(async move { + let mut flow = Flow::load(planned_flow_id, flow_event_store.as_ref()) + .await + .int_err()?; + self.schedule_flow_task(target_catalog, &mut flow, timeslot_time) + .await?; + Ok(()) + }); + } + + let results = futures::future::join_all(planned_task_futures).await; + results + .into_iter() + .filter(Result::is_err) + .map(|e| e.err().unwrap()) + .for_each(|e: InternalError| { + tracing::error!(error=?e, "Scheduling flow failed"); + }); + + Ok(()) + } + + #[tracing::instrument(level = "debug", skip_all)] + async fn recover_time_wheel( + &self, + target_catalog: &Catalog, + start_time: DateTime, + ) -> Result<(), InternalError> { + // Extract necessary dependencies + let flow_event_store = target_catalog.get_one::().unwrap(); + let flow_timewheel_service = target_catalog + .get_one::() + .unwrap(); + let enqueue_helper = target_catalog.get_one::().unwrap(); + + // How many waiting flows do we have? + let waiting_filters = AllFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + by_initiator: None, + }; + let total_waiting_flows = flow_event_store + .get_count_all_flows(&waiting_filters) + .await?; + + // For each waiting flow, check if it should contribute to time wheel + // Load them in pages + let mut processed_waiting_flows = 0; + while processed_waiting_flows < total_waiting_flows { + // Another page + let waiting_flow_ids: Vec<_> = flow_event_store + .get_all_flow_ids( + &waiting_filters, + PaginationOpts { + offset: processed_waiting_flows, + limit: 100, + }, + ) + .try_collect() + .await?; + + // Process each waiting flow + for waiting_flow_id in &waiting_flow_ids { + // TODO: batch loading of flows + let flow = Flow::load(*waiting_flow_id, flow_event_store.as_ref()) + .await + .int_err()?; + + // We are not interested in flows with scheduled tasks, + // as restoring these will be handled by TaskExecutor. + if let Some(start_condition) = &flow.start_condition { + // We have to recover wakeup for scheduled/throttling condition + if let Some(wakeup_time) = start_condition.wake_up_at() { + let mut activation_time = wakeup_time; + if activation_time < start_time { + activation_time = start_time; + } + flow_timewheel_service.activate_at(activation_time, *waiting_flow_id); + } + // and we also need to re-evaluate the batching condition + else if let FlowStartCondition::Batching(b) = start_condition { + enqueue_helper + .trigger_flow_common( + &flow.flow_key, + FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: start_time, + }), + FlowTriggerContext::Batching(b.active_transform_rule), + None, + ) + .await?; + } + } + } + + processed_waiting_flows += waiting_flow_ids.len(); + } + + Ok(()) + } + + #[tracing::instrument(level = "debug", skip_all)] + async fn restore_auto_polling_flows_from_configurations( + &self, + target_catalog: &Catalog, + start_time: DateTime, + ) -> Result<(), InternalError> { + let flow_configuration_service = target_catalog + .get_one::() + .unwrap(); + let flow_event_store = target_catalog.get_one::().unwrap(); + + // Query all enabled flow configurations + let enabled_configurations: Vec<_> = flow_configuration_service + .list_enabled_configurations() + .try_collect() + .await?; + + // Split configs by those which have a schedule or different rules + let (schedule_configs, non_schedule_configs): (Vec<_>, Vec<_>) = enabled_configurations + .into_iter() + .partition(|config| matches!(config.rule, FlowConfigurationRule::Schedule(_))); + + let enqueue_helper = target_catalog.get_one::().unwrap(); + + // Activate all configs, ensuring schedule configs precedes non-schedule configs + // (this i.e. forces all root datasets to be updated earlier than the derived) + // + // Thought: maybe we need topological sorting by derived relations as well to + // optimize the initial execution order, but batching rules may work just fine + for enabled_config in schedule_configs + .into_iter() + .chain(non_schedule_configs.into_iter()) + { + // Do not re-trigger the flow that has already triggered + let maybe_pending_flow_id = flow_event_store + .try_get_pending_flow(&enabled_config.flow_key) + .await?; + if maybe_pending_flow_id.is_none() { + enqueue_helper + .activate_flow_configuration( + start_time, + enabled_config.flow_key, + enabled_config.rule, + ) + .await?; + } + } + + Ok(()) + } + + #[tracing::instrument(level = "trace", skip_all, fields(flow_id = %flow.flow_id))] + async fn schedule_flow_task( + &self, + target_catalog: Catalog, + flow: &mut Flow, + schedule_time: DateTime, + ) -> Result { + let logical_plan = + self.make_task_logical_plan(&flow.flow_key, flow.config_snapshot.as_ref())?; + + let task_scheduler = target_catalog.get_one::().unwrap(); + let task = task_scheduler + .create_task( + logical_plan, + Some(TaskMetadata::from(vec![( + METADATA_TASK_FLOW_ID, + flow.flow_id.to_string(), + )])), + ) + .await + .int_err()?; + + flow.set_relevant_start_condition( + schedule_time, + FlowStartCondition::Executor(FlowStartConditionExecutor { + task_id: task.task_id, + }), + ) + .int_err()?; + + flow.on_task_scheduled(schedule_time, task.task_id) + .int_err()?; + + let flow_event_store = target_catalog.get_one::().unwrap(); + flow.save(flow_event_store.as_ref()).await.int_err()?; + + Ok(task.task_id) + } + + /// Creates task logical plan that corresponds to template + pub fn make_task_logical_plan( + &self, + flow_key: &FlowKey, + maybe_config_snapshot: Option<&FlowConfigurationSnapshot>, + ) -> Result { + match flow_key { + FlowKey::Dataset(flow_key) => match flow_key.flow_type { + DatasetFlowType::Ingest | DatasetFlowType::ExecuteTransform => { + let mut fetch_uncacheable = false; + if let Some(config_snapshot) = maybe_config_snapshot + && let FlowConfigurationSnapshot::Ingest(ingest_rule) = config_snapshot + { + fetch_uncacheable = ingest_rule.fetch_uncacheable; + } + Ok(LogicalPlan::UpdateDataset(UpdateDataset { + dataset_id: flow_key.dataset_id.clone(), + fetch_uncacheable, + })) + } + DatasetFlowType::HardCompaction => { + let mut max_slice_size: Option = None; + let mut max_slice_records: Option = None; + let mut keep_metadata_only = false; + + if let Some(config_snapshot) = maybe_config_snapshot + && let FlowConfigurationSnapshot::Compaction(compaction_rule) = + config_snapshot + { + max_slice_size = compaction_rule.max_slice_size(); + max_slice_records = compaction_rule.max_slice_records(); + keep_metadata_only = + matches!(compaction_rule, CompactionRule::MetadataOnly(_)); + }; + + Ok(LogicalPlan::HardCompactionDataset(HardCompactionDataset { + dataset_id: flow_key.dataset_id.clone(), + max_slice_size, + max_slice_records, + keep_metadata_only, + })) + } + DatasetFlowType::Reset => { + if let Some(config_rule) = maybe_config_snapshot + && let FlowConfigurationSnapshot::Reset(reset_rule) = config_rule + { + return Ok(LogicalPlan::Reset(ResetDataset { + dataset_id: flow_key.dataset_id.clone(), + new_head_hash: reset_rule.new_head_hash.clone(), + old_head_hash: reset_rule.old_head_hash.clone(), + recursive: reset_rule.recursive, + })); + } + InternalError::bail("Reset flow cannot be called without configuration") + } + }, + FlowKey::System(flow_key) => { + match flow_key.flow_type { + // TODO: replace on correct logical plan + SystemFlowType::GC => Ok(LogicalPlan::Probe(Probe { + dataset_id: None, + busy_time: Some(std::time::Duration::from_secs(20)), + end_with_outcome: Some(TaskOutcome::Success(TaskResult::Empty)), + })), + } + } + } + } + + fn flow_id_from_task_metadata( + task_metadata: &TaskMetadata, + ) -> Result, InternalError> { + let maybe_flow_id_property = task_metadata.try_get_property(METADATA_TASK_FLOW_ID); + Ok(match maybe_flow_id_property { + Some(flow_id_property) => Some(FlowID::from(&flow_id_property).int_err()?), + None => None, + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl FlowExecutor for FlowExecutorImpl { + #[tracing::instrument(level = "info", skip_all)] + async fn pre_run(&self, planned_start_time: DateTime) -> Result<(), InternalError> { + let start_time = self.executor_config.round_time(planned_start_time)?; + + // Initial scheduling + DatabaseTransactionRunner::new(self.catalog.clone()) + .transactional(|target_catalog: Catalog| async move { + // Recover already scheduled flows after server restart + self.recover_time_wheel(&target_catalog, start_time).await?; + + // Restore auto polling flows: + // - read active configurations + // - automatically trigger flows, if they are not waiting already + self.restore_auto_polling_flows_from_configurations(&target_catalog, start_time) + .await?; + + // Publish progress event + let outbox = target_catalog.get_one::().unwrap(); + outbox + .post_message( + MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, + FlowExecutorUpdatedMessage { + update_time: start_time, + update_details: FlowExecutorUpdateDetails::Loaded, + }, + ) + .await?; + + Ok(()) + }) + .await + } + + /// Runs the update main loop + #[tracing::instrument(level = "info", skip_all)] + async fn run(&self) -> Result<(), InternalError> { + // Main scanning loop + let main_loop_span = tracing::debug_span!("FlowExecutor main loop"); + let _ = main_loop_span.enter(); + + loop { + let current_time = self.time_source.now(); + + // Do we have a timeslot scheduled? + let maybe_nearest_activation_time = + self.flow_time_wheel_service.nearest_activation_moment(); + + // Is it time to execute it yet? + if let Some(nearest_activation_time) = maybe_nearest_activation_time + && nearest_activation_time <= current_time + { + DatabaseTransactionRunner::new(self.catalog.clone()) + .transactional(|target_catalog: Catalog| async move { + // Run scheduling for current time slot. Should not throw any errors + self.run_current_timeslot(&target_catalog, nearest_activation_time) + .await?; + + // Publish progress event + let outbox = target_catalog.get_one::().unwrap(); + outbox + .post_message( + MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, + FlowExecutorUpdatedMessage { + update_time: nearest_activation_time, + update_details: FlowExecutorUpdateDetails::ExecutedTimeslot, + }, + ) + .await?; + + Ok(()) + }) + .await?; + } + + self.time_source + .sleep(self.executor_config.awaiting_step) + .await; + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl FlowExecutorTestDriver for FlowExecutorImpl { + /// Pretends it is time to schedule the given flow that was not waiting for + /// anything else + async fn mimic_flow_scheduled( + &self, + target_catalog: &Catalog, + flow_id: FlowID, + schedule_time: DateTime, + ) -> Result { + self.flow_time_wheel_service + .cancel_flow_activation(flow_id) + .int_err()?; + + let flow_event_store = target_catalog.get_one::().unwrap(); + let mut flow = Flow::load(flow_id, flow_event_store.as_ref()) + .await + .int_err()?; + + let task_id = self + .schedule_flow_task(target_catalog.clone(), &mut flow, schedule_time) + .await?; + + Ok(task_id) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl MessageConsumer for FlowExecutorImpl {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl MessageConsumerT for FlowExecutorImpl { + #[tracing::instrument(level = "debug", skip_all, fields(?message))] + async fn consume_message( + &self, + target_catalog: &Catalog, + message: &TaskProgressMessage, + ) -> Result<(), InternalError> { + let flow_event_store = target_catalog.get_one::().unwrap(); + + match message { + TaskProgressMessage::Running(message) => { + // Is this a task associated with flows? + let maybe_flow_id = Self::flow_id_from_task_metadata(&message.task_metadata)?; + if let Some(flow_id) = maybe_flow_id { + let mut flow = Flow::load(flow_id, flow_event_store.as_ref()) + .await + .int_err()?; + flow.on_task_running(message.event_time, message.task_id) + .int_err()?; + flow.save(flow_event_store.as_ref()).await.int_err()?; + + let outbox = target_catalog.get_one::().unwrap(); + outbox + .post_message( + MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, + FlowExecutorUpdatedMessage { + update_time: message.event_time, + update_details: FlowExecutorUpdateDetails::FlowRunning, + }, + ) + .await?; + } + } + TaskProgressMessage::Finished(message) => { + // Is this a task associated with flows? + let maybe_flow_id = Self::flow_id_from_task_metadata(&message.task_metadata)?; + if let Some(flow_id) = maybe_flow_id { + let mut flow = Flow::load(flow_id, flow_event_store.as_ref()) + .await + .int_err()?; + flow.on_task_finished( + message.event_time, + message.task_id, + message.outcome.clone(), + ) + .int_err()?; + flow.save(flow_event_store.as_ref()).await.int_err()?; + + let enqueue_helper = target_catalog.get_one::().unwrap(); + + let finish_time = self.executor_config.round_time(message.event_time)?; + + // In case of success: + // - execute followup method + if let Some(flow_result) = flow.try_result_as_ref() + && !flow_result.is_empty() + { + match flow.flow_key.get_type().success_followup_method() { + FlowSuccessFollowupMethod::Ignore => {} + FlowSuccessFollowupMethod::TriggerDependent => { + enqueue_helper + .enqueue_dependent_flows(finish_time, &flow, flow_result) + .await?; + } + } + } + + // In case of success: + // - enqueue next auto-polling flow cycle + if message.outcome.is_success() { + enqueue_helper + .try_enqueue_scheduled_auto_polling_flow_if_enabled( + finish_time, + &flow.flow_key, + ) + .await?; + } + + let outbox = target_catalog.get_one::().unwrap(); + outbox + .post_message( + MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, + FlowExecutorUpdatedMessage { + update_time: message.event_time, + update_details: FlowExecutorUpdateDetails::FlowFinished, + }, + ) + .await?; + + // TODO: retry logic in case of failed outcome + } + } + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl MessageConsumerT for FlowExecutorImpl { + #[tracing::instrument(level = "debug", skip_all, fields(?message))] + async fn consume_message( + &self, + target_catalog: &Catalog, + message: &FlowConfigurationUpdatedMessage, + ) -> Result<(), InternalError> { + if message.paused { + let maybe_pending_flow_id = { + let flow_event_store = target_catalog.get_one::().unwrap(); + + let maybe_pending_flow_id = flow_event_store + .try_get_pending_flow(&message.flow_key) + .await?; + + if let Some(flow_id) = &maybe_pending_flow_id { + self.flow_time_wheel_service + .cancel_flow_activation(*flow_id) + .int_err()?; + } + maybe_pending_flow_id + }; + + if let Some(flow_id) = maybe_pending_flow_id { + let abort_helper = target_catalog.get_one::().unwrap(); + abort_helper.abort_flow(flow_id).await?; + } + } else { + let enqueue_helper = target_catalog.get_one::().unwrap(); + enqueue_helper + .activate_flow_configuration( + self.executor_config.round_time(message.event_time)?, + message.flow_key.clone(), + message.rule.clone(), + ) + .await?; + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl MessageConsumerT for FlowExecutorImpl { + #[tracing::instrument(level = "debug", skip_all, fields(?message))] + async fn consume_message( + &self, + target_catalog: &Catalog, + message: &DatasetLifecycleMessage, + ) -> Result<(), InternalError> { + match message { + DatasetLifecycleMessage::Deleted(message) => { + let flow_ids_2_abort = { + let flow_event_store = target_catalog.get_one::().unwrap(); + + // For every possible dataset flow: + // - drop queued activations + // - collect ID of aborted flow + let mut flow_ids_2_abort: Vec<_> = + Vec::with_capacity(DatasetFlowType::all().len()); + for flow_type in DatasetFlowType::all() { + if let Some(flow_id) = flow_event_store + .try_get_pending_flow(&FlowKey::dataset( + message.dataset_id.clone(), + *flow_type, + )) + .await? + { + flow_ids_2_abort.push(flow_id); + self.flow_time_wheel_service + .cancel_flow_activation(flow_id) + .int_err()?; + } + } + flow_ids_2_abort + }; + + let flow_event_store = target_catalog.get_one::().unwrap(); + + // Abort matched flows + for flow_id in flow_ids_2_abort { + let mut flow = Flow::load(flow_id, flow_event_store.as_ref()) + .await + .int_err()?; + flow.abort(self.time_source.now()).int_err()?; + flow.save(flow_event_store.as_ref()).await.int_err()?; + } + + // Not deleting task->update association, it should be safe. + // Most of the time the outcome of the task will be "Cancelled". + // Even if task squeezes to succeed in between cancellations, + // it's safe: + // - we will record a successful update, no consequence + // - no further updates will be attempted (schedule + // deactivated above) + // - no dependent tasks will be launched (dependency graph + // erases neighbors) + } + + DatasetLifecycleMessage::Created(_) + | DatasetLifecycleMessage::DependenciesUpdated(_) => { + // No action required + } + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Eq, PartialEq)] +pub enum FlowTriggerContext { + Unconditional, + Scheduled(Schedule), + Batching(TransformRule), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Eq, PartialEq)] +pub struct DownstreamDependencyFlowPlan { + pub flow_key: FlowKey, + pub flow_trigger_context: FlowTriggerContext, + pub maybe_config_snapshot: Option, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub enum DownstreamDependencyTriggerType { + TriggerAllEnabledExecuteTransform, + TriggerOwnHardCompaction, + Empty, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs b/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs new file mode 100644 index 000000000..adc06d3c9 --- /dev/null +++ b/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs @@ -0,0 +1,330 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashSet; +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use database_common::PaginationOpts; +use dill::{component, interface, Catalog}; +use futures::TryStreamExt; +use internal_error::ResultIntoInternal; +use kamu_core::DatasetOwnershipService; +use kamu_flow_system::*; +use opendatafabric::{AccountID, DatasetID}; + +use super::FlowTriggerContext; +use crate::{FlowAbortHelper, FlowEnqueueHelper}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct FlowQueryServiceImpl { + catalog: Catalog, + flow_event_store: Arc, + dataset_ownership_service: Arc, + executor_config: Arc, +} + +#[component(pub)] +#[interface(dyn FlowQueryService)] +impl FlowQueryServiceImpl { + pub fn new( + catalog: Catalog, + flow_event_store: Arc, + dataset_ownership_service: Arc, + executor_config: Arc, + ) -> Self { + Self { + catalog, + flow_event_store, + dataset_ownership_service, + executor_config, + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl FlowQueryService for FlowQueryServiceImpl { + /// Returns states of flows associated with a given dataset + /// ordered by creation time from newest to oldest + /// Applies specified filters + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id, ?filters, ?pagination))] + async fn list_all_flows_by_dataset( + &self, + dataset_id: &DatasetID, + filters: DatasetFlowFilters, + pagination: PaginationOpts, + ) -> Result { + let total_count = self + .flow_event_store + .get_count_flows_by_dataset(dataset_id, &filters) + .await?; + + let dataset_id = dataset_id.clone(); + + let matched_stream = Box::pin(async_stream::try_stream! { + let relevant_flow_ids: Vec<_> = self + .flow_event_store + .get_all_flow_ids_by_dataset(&dataset_id, &filters, pagination) + .try_collect() + .await?; + + // TODO: implement batch loading + for flow_id in relevant_flow_ids { + let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; + yield flow.into(); + } + }); + + Ok(FlowStateListing { + matched_stream, + total_count, + }) + } + + /// Returns initiators of flows associated with a given dataset + /// ordered by creation time from newest to oldest + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id))] + async fn list_all_flow_initiators_by_dataset( + &self, + dataset_id: &DatasetID, + ) -> Result { + Ok(FlowInitiatorListing { + matched_stream: self + .flow_event_store + .get_unique_flow_initiator_ids_by_dataset(dataset_id), + }) + } + + /// Returns states of flows associated with a given account + /// ordered by creation time from newest to oldest + /// Applies specified filters + #[tracing::instrument(level = "debug", skip_all, fields(%account_id, ?filters, ?pagination))] + async fn list_all_flows_by_account( + &self, + account_id: &AccountID, + filters: AccountFlowFilters, + pagination: PaginationOpts, + ) -> Result { + let owned_dataset_ids = self + .dataset_ownership_service + .get_owned_datasets(account_id) + .await + .map_err(ListFlowsByDatasetError::Internal)?; + + let filtered_dataset_ids = if !filters.by_dataset_ids.is_empty() { + owned_dataset_ids + .into_iter() + .filter(|dataset_id| filters.by_dataset_ids.contains(dataset_id)) + .collect() + } else { + owned_dataset_ids + }; + + let mut total_count = 0; + let dataset_flow_filters = DatasetFlowFilters { + by_flow_status: filters.by_flow_status, + by_flow_type: filters.by_flow_type, + by_initiator: filters.by_initiator, + }; + + for dataset_id in &filtered_dataset_ids { + total_count += self + .flow_event_store + .get_count_flows_by_dataset(dataset_id, &dataset_flow_filters) + .await?; + } + + let account_dataset_ids: HashSet = HashSet::from_iter(filtered_dataset_ids); + + let matched_stream = Box::pin(async_stream::try_stream! { + let relevant_flow_ids: Vec<_> = self + .flow_event_store + .get_all_flow_ids_by_datasets(account_dataset_ids, &dataset_flow_filters, pagination) + .try_collect() + .await + .int_err()?; + + // TODO: implement batch loading + for flow_id in relevant_flow_ids { + let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; + yield flow.into(); + } + }); + + Ok(FlowStateListing { + matched_stream, + total_count, + }) + } + + /// Returns datasets with flows associated with a given account + /// ordered by creation time from newest to oldest. + #[tracing::instrument(level = "debug", skip_all, fields(%account_id))] + async fn list_all_datasets_with_flow_by_account( + &self, + account_id: &AccountID, + ) -> Result { + let owned_dataset_ids = self + .dataset_ownership_service + .get_owned_datasets(account_id) + .await + .map_err(ListFlowsByDatasetError::Internal)?; + + let matched_stream = Box::pin(async_stream::try_stream! { + for dataset_id in &owned_dataset_ids { + let dataset_flows_count = self + .flow_event_store + .get_count_flows_by_dataset(dataset_id, &Default::default()) + .await?; + + if dataset_flows_count > 0 { + yield dataset_id.clone(); + } + } + }); + + Ok(FlowDatasetListing { matched_stream }) + } + + /// Returns states of system flows + /// ordered by creation time from newest to oldest + /// Applies specified filters + #[tracing::instrument(level = "debug", skip_all, fields(?filters, ?pagination))] + async fn list_all_system_flows( + &self, + filters: SystemFlowFilters, + pagination: PaginationOpts, + ) -> Result { + let total_count = self + .flow_event_store + .get_count_system_flows(&filters) + .await + .int_err()?; + + let matched_stream = Box::pin(async_stream::try_stream! { + let relevant_flow_ids: Vec<_> = self + .flow_event_store + .get_all_system_flow_ids(&filters, pagination) + .try_collect() + .await?; + + // TODO: implement batch loading + for flow_id in relevant_flow_ids { + let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; + yield flow.into(); + } + }); + + Ok(FlowStateListing { + matched_stream, + total_count, + }) + } + + /// Returns state of all flows, whether they are system-level or + /// dataset-bound, ordered by creation time from newest to oldest + #[tracing::instrument(level = "debug", skip_all, fields(?pagination))] + async fn list_all_flows( + &self, + pagination: PaginationOpts, + ) -> Result { + let empty_filters = AllFlowFilters::default(); + let total_count = self + .flow_event_store + .get_count_all_flows(&empty_filters) + .await?; + + let matched_stream = Box::pin(async_stream::try_stream! { + let all_flows: Vec<_> = self + .flow_event_store + .get_all_flow_ids(&empty_filters, pagination) + .try_collect() + .await?; + + // TODO: implement batch loading + for flow_id in all_flows { + let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; + yield flow.into(); + } + }); + + Ok(FlowStateListing { + matched_stream, + total_count, + }) + } + + /// Returns current state of a given flow + #[tracing::instrument(level = "debug", skip_all, fields(%flow_id))] + async fn get_flow(&self, flow_id: FlowID) -> Result { + let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await?; + Ok(flow.into()) + } + + /// Triggers the specified flow manually, unless it's already waiting + #[tracing::instrument( + level = "debug", + skip_all, + fields(?flow_key, %initiator_account_id) + )] + async fn trigger_manual_flow( + &self, + trigger_time: DateTime, + flow_key: FlowKey, + initiator_account_id: AccountID, + config_snapshot_maybe: Option, + ) -> Result { + let activation_time = self.executor_config.round_time(trigger_time)?; + + let enqueue_helper = self.catalog.get_one::().unwrap(); + enqueue_helper + .trigger_flow_common( + &flow_key, + FlowTrigger::Manual(FlowTriggerManual { + trigger_time: activation_time, + initiator_account_id, + }), + FlowTriggerContext::Unconditional, + config_snapshot_maybe, + ) + .await + .map_err(RequestFlowError::Internal) + } + + /// Attempts to cancel the tasks already scheduled for the given flow + #[tracing::instrument( + level = "debug", + skip_all, + fields(%flow_id) + )] + async fn cancel_scheduled_tasks( + &self, + flow_id: FlowID, + ) -> Result { + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await?; + + // Cancel tasks for flows in Waiting/Running state. + // Ignore in Finished state + match flow.status() { + FlowStatus::Waiting | FlowStatus::Running => { + // Abort current flow and it's scheduled tasks + let abort_helper = self.catalog.get_one::().unwrap(); + abort_helper.abort_loaded_flow(&mut flow).await?; + } + FlowStatus::Finished => { /* Skip, idempotence */ } + } + + Ok(flow.into()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/flow_service_impl.rs b/src/domain/flow-system/services/src/flow/flow_service_impl.rs deleted file mode 100644 index f8a8c4f0d..000000000 --- a/src/domain/flow-system/services/src/flow/flow_service_impl.rs +++ /dev/null @@ -1,1602 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -use std::collections::HashSet; -use std::sync::{Arc, Mutex}; - -use chrono::{DateTime, DurationRound, Utc}; -use database_common::DatabaseTransactionRunner; -use dill::*; -use futures::TryStreamExt; -use internal_error::InternalError; -use kamu_core::{ - DatasetChangesService, - DatasetLifecycleMessage, - DatasetOwnershipService, - DependencyGraphService, - MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, -}; -use kamu_flow_system::*; -use kamu_task_system::*; -use messaging_outbox::{ - MessageConsumer, - MessageConsumerMeta, - MessageConsumerT, - MessageConsumptionDurability, - Outbox, - OutboxExt, -}; -use opendatafabric::{AccountID, DatasetID}; -use time_source::SystemTimeSource; -use tokio_stream::StreamExt; - -use super::active_configs_state::ActiveConfigsState; -use super::flow_time_wheel::FlowTimeWheel; -use super::pending_flows_state::PendingFlowsState; -use crate::{ - MESSAGE_CONSUMER_KAMU_FLOW_SERVICE, - MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, - MESSAGE_PRODUCER_KAMU_FLOW_SERVICE, -}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct FlowServiceImpl { - catalog: Catalog, - state: Arc>, - run_config: Arc, - flow_event_store: Arc, - time_source: Arc, - task_scheduler: Arc, - dataset_changes_service: Arc, - dependency_graph_service: Arc, - dataset_ownership_service: Arc, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Default)] -struct State { - active_configs: ActiveConfigsState, - pending_flows: PendingFlowsState, - time_wheel: FlowTimeWheel, - running: bool, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[component(pub)] -#[interface(dyn FlowService)] -#[interface(dyn FlowServiceTestDriver)] -#[interface(dyn MessageConsumer)] -#[interface(dyn MessageConsumerT)] -#[interface(dyn MessageConsumerT)] -#[interface(dyn MessageConsumerT)] -#[meta(MessageConsumerMeta { - consumer_name: MESSAGE_CONSUMER_KAMU_FLOW_SERVICE, - feeding_producers: &[ - MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, - MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE - ], - durability: MessageConsumptionDurability::Durable, -})] -#[scope(Singleton)] -impl FlowServiceImpl { - pub fn new( - catalog: Catalog, - run_config: Arc, - flow_event_store: Arc, - time_source: Arc, - task_scheduler: Arc, - dataset_changes_service: Arc, - dependency_graph_service: Arc, - dataset_ownership_service: Arc, - ) -> Self { - Self { - catalog, - state: Arc::new(Mutex::new(State::default())), - run_config, - flow_event_store, - time_source, - task_scheduler, - dataset_changes_service, - dependency_graph_service, - dataset_ownership_service, - } - } - - fn round_time(&self, time: DateTime) -> Result, InternalError> { - let rounded_time = time - .duration_round(self.run_config.awaiting_step) - .int_err()?; - Ok(rounded_time) - } - - #[tracing::instrument(level = "debug", skip_all)] - async fn run_current_timeslot( - &self, - timeslot_time: DateTime, - ) -> Result<(), InternalError> { - let planned_flow_ids: Vec<_> = { - let mut state = self.state.lock().unwrap(); - state.time_wheel.take_nearest_planned_flows() - }; - - let mut planned_task_futures = Vec::new(); - for planned_flow_id in planned_flow_ids { - planned_task_futures.push(async move { - let mut flow = Flow::load(planned_flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - self.schedule_flow_task(&mut flow, timeslot_time).await?; - Ok(()) - }); - } - - let results = futures::future::join_all(planned_task_futures).await; - results - .into_iter() - .filter(Result::is_err) - .map(|e| e.err().unwrap()) - .for_each(|e: InternalError| { - tracing::error!(error=?e, "Scheduling flow failed"); - }); - - Ok(()) - } - - #[tracing::instrument(level = "debug", skip_all)] - async fn initialize_auto_polling_flows_from_configurations( - &self, - flow_configuration_service: &dyn FlowConfigurationService, - start_time: DateTime, - ) -> Result<(), InternalError> { - // Query all enabled flow configurations - let enabled_configurations: Vec<_> = flow_configuration_service - .list_enabled_configurations() - .try_collect() - .await?; - - // Split configs by those which have a schedule or different rules - let (schedule_configs, non_schedule_configs): (Vec<_>, Vec<_>) = enabled_configurations - .into_iter() - .partition(|config| matches!(config.rule, FlowConfigurationRule::Schedule(_))); - - // Activate all configs, ensuring schedule configs precedes non-schedule configs - // (this i.e. forces all root datasets to be updated earlier than the derived) - // - // Thought: maybe we need topological sorting by derived relations as well to - // optimize the initial execution order, but batching rules may work just fine - for enabled_config in schedule_configs - .into_iter() - .chain(non_schedule_configs.into_iter()) - { - self.activate_flow_configuration( - start_time, - enabled_config.flow_key, - enabled_config.rule, - ) - .await?; - } - - Ok(()) - } - - #[tracing::instrument(level = "trace", skip_all, fields(?flow_key, ?rule))] - async fn activate_flow_configuration( - &self, - start_time: DateTime, - flow_key: FlowKey, - rule: FlowConfigurationRule, - ) -> Result<(), InternalError> { - match &flow_key { - FlowKey::Dataset(dataset_flow_key) => { - self.state - .lock() - .unwrap() - .active_configs - .add_dataset_flow_config(dataset_flow_key, rule.clone()); - - match &rule { - FlowConfigurationRule::TransformRule(_) => { - self.enqueue_auto_polling_flow_unconditionally(start_time, &flow_key) - .await?; - } - // Such as compaction and reset is very dangerous operation we - // skip running it during activation flow configurations. - // And schedule will be used only for system flows - FlowConfigurationRule::CompactionRule(_) - | FlowConfigurationRule::Schedule(_) - | FlowConfigurationRule::ResetRule(_) => (), - FlowConfigurationRule::IngestRule(ingest_rule) => { - self.enqueue_scheduled_auto_polling_flow( - start_time, - &flow_key, - &ingest_rule.schedule_condition, - ) - .await?; - } - } - } - FlowKey::System(system_flow_key) => { - if let FlowConfigurationRule::Schedule(schedule) = &rule { - self.state - .lock() - .unwrap() - .active_configs - .add_system_flow_config(system_flow_key.flow_type, schedule.clone()); - - self.enqueue_scheduled_auto_polling_flow(start_time, &flow_key, schedule) - .await?; - } else { - unimplemented!( - "Doubt will ever need to schedule system flows via batching rules" - ) - } - } - } - - Ok(()) - } - - #[tracing::instrument(level = "trace", skip_all, fields(?flow_key))] - async fn try_enqueue_scheduled_auto_polling_flow_if_enabled( - &self, - start_time: DateTime, - flow_key: &FlowKey, - ) -> Result<(), InternalError> { - let maybe_active_schedule = self - .state - .lock() - .unwrap() - .active_configs - .try_get_flow_schedule(flow_key); - - if let Some(active_schedule) = maybe_active_schedule { - self.enqueue_scheduled_auto_polling_flow(start_time, flow_key, &active_schedule) - .await?; - } - - Ok(()) - } - - #[tracing::instrument(level = "trace", skip_all, fields(?flow_key, ?schedule))] - async fn enqueue_scheduled_auto_polling_flow( - &self, - start_time: DateTime, - flow_key: &FlowKey, - schedule: &Schedule, - ) -> Result { - self.trigger_flow_common( - flow_key, - FlowTrigger::AutoPolling(FlowTriggerAutoPolling { - trigger_time: start_time, - }), - FlowTriggerContext::Scheduled(schedule.clone()), - None, - ) - .await - } - - #[tracing::instrument(level = "trace", skip_all, fields(?flow_key))] - async fn enqueue_auto_polling_flow_unconditionally( - &self, - start_time: DateTime, - flow_key: &FlowKey, - ) -> Result { - // Very similar to manual trigger, but automatic reasons - self.trigger_flow_common( - flow_key, - FlowTrigger::AutoPolling(FlowTriggerAutoPolling { - trigger_time: start_time, - }), - FlowTriggerContext::Unconditional, - None, - ) - .await - } - - #[tracing::instrument(level = "trace", skip_all, fields(?flow.flow_key, %flow.flow_id, ))] - async fn enqueue_dependent_flows( - &self, - input_success_time: DateTime, - flow: &Flow, - flow_result: &FlowResult, - ) -> Result<(), InternalError> { - if let FlowKey::Dataset(fk_dataset) = &flow.flow_key { - let dependent_dataset_flow_plans = self - .make_downstream_dependencies_flow_plans(fk_dataset, flow.config_snapshot.as_ref()) - .await?; - if dependent_dataset_flow_plans.is_empty() { - return Ok(()); - } - let trigger = FlowTrigger::InputDatasetFlow(FlowTriggerInputDatasetFlow { - trigger_time: input_success_time, - dataset_id: fk_dataset.dataset_id.clone(), - flow_type: fk_dataset.flow_type, - flow_id: flow.flow_id, - flow_result: flow_result.clone(), - }); - // For each, trigger needed flow - for dependent_dataset_flow_plan in dependent_dataset_flow_plans { - self.trigger_flow_common( - &dependent_dataset_flow_plan.flow_key, - trigger.clone(), - dependent_dataset_flow_plan.flow_trigger_context, - dependent_dataset_flow_plan.maybe_config_snapshot, - ) - .await?; - } - - Ok(()) - } else { - unreachable!("Not expecting other types of flow keys than dataset"); - } - } - - async fn trigger_flow_common( - &self, - flow_key: &FlowKey, - trigger: FlowTrigger, - context: FlowTriggerContext, - config_snapshot_maybe: Option, - ) -> Result { - // Query previous runs stats to determine activation time - let flow_run_stats = self.flow_run_stats(flow_key).await?; - - // Flows may not be attempted more frequent than mandatory throttling period. - // If flow has never run before, let it go without restriction. - let trigger_time = trigger.trigger_time(); - let mut throttling_boundary_time = - flow_run_stats.last_attempt_time.map_or(trigger_time, |t| { - t + self.run_config.mandatory_throttling_period - }); - // It's also possible we are waiting for some start condition much longer.. - if throttling_boundary_time < trigger_time { - throttling_boundary_time = trigger_time; - } - - // Is a pending flow present for this config? - match self.find_pending_flow(flow_key) { - // Already pending flow - Some(flow_id) => { - // Load, merge triggers, update activation time - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - - // Only merge unique triggers, ignore identical - flow.add_trigger_if_unique(self.time_source.now(), trigger) - .int_err()?; - - match context { - FlowTriggerContext::Batching(transform_rule) => { - // Is this rule still waited? - if matches!(flow.start_condition, Some(FlowStartCondition::Batching(_))) { - self.evaluate_flow_transform_rule( - trigger_time, - &mut flow, - &transform_rule, - throttling_boundary_time, - ) - .await?; - } else { - // Skip, the flow waits for something else - } - } - FlowTriggerContext::Scheduled(_) | FlowTriggerContext::Unconditional => { - // Evaluate throttling condition: is new time earlier than planned? - let planned_time = self - .find_planned_flow_activation_time(flow.flow_id) - .expect("Flow expected to have activation time by now"); - - if throttling_boundary_time < planned_time { - // If so, enqueue the flow earlier - self.enqueue_flow(flow.flow_id, throttling_boundary_time)?; - - // Indicate throttling, if applied - if throttling_boundary_time > trigger_time { - self.indicate_throttling_activity( - &mut flow, - throttling_boundary_time, - trigger_time, - )?; - } - } - } - } - - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - Ok(flow.into()) - } - - // Otherwise, initiate a new flow, and enqueue it in the time wheel - None => { - // Initiate new flow - let config_snapshot_maybe = if config_snapshot_maybe.is_some() { - config_snapshot_maybe - } else { - self.state - .lock() - .unwrap() - .active_configs - .try_get_config_snapshot_by_key(flow_key) - }; - let mut flow = self - .make_new_flow(flow_key.clone(), trigger, config_snapshot_maybe) - .await?; - - match context { - FlowTriggerContext::Batching(transform_rule) => { - // Don't activate if batching condition not satisfied - self.evaluate_flow_transform_rule( - trigger_time, - &mut flow, - &transform_rule, - throttling_boundary_time, - ) - .await?; - } - FlowTriggerContext::Scheduled(schedule) => { - // Next activation time depends on: - // - last success time, if ever launched - // - schedule, if defined - let naive_next_activation_time = schedule - .next_activation_time(trigger_time, flow_run_stats.last_success_time); - - // Apply throttling boundary - let next_activation_time = - std::cmp::max(throttling_boundary_time, naive_next_activation_time); - self.enqueue_flow(flow.flow_id, next_activation_time)?; - - // Set throttling activity as start condition - if throttling_boundary_time > naive_next_activation_time { - self.indicate_throttling_activity( - &mut flow, - throttling_boundary_time, - naive_next_activation_time, - )?; - } else if naive_next_activation_time > trigger_time { - // Set waiting according to the schedule - flow.set_relevant_start_condition( - self.time_source.now(), - FlowStartCondition::Schedule(FlowStartConditionSchedule { - wake_up_at: naive_next_activation_time, - }), - ) - .int_err()?; - } - } - FlowTriggerContext::Unconditional => { - // Apply throttling boundary - let next_activation_time = - std::cmp::max(throttling_boundary_time, trigger_time); - self.enqueue_flow(flow.flow_id, next_activation_time)?; - - // Set throttling activity as start condition - if throttling_boundary_time > trigger_time { - self.indicate_throttling_activity( - &mut flow, - throttling_boundary_time, - trigger_time, - )?; - } - } - } - - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - Ok(flow.into()) - } - } - } - - async fn evaluate_flow_transform_rule( - &self, - evaluation_time: DateTime, - flow: &mut Flow, - transform_rule: &TransformRule, - throttling_boundary_time: DateTime, - ) -> Result<(), InternalError> { - assert!(matches!( - flow.flow_key.get_type(), - AnyFlowType::Dataset( - DatasetFlowType::ExecuteTransform | DatasetFlowType::HardCompaction - ) - )); - - // TODO: it's likely assumed the accumulation is per each input separately, but - // for now count overall number - let mut accumulated_records_count = 0; - let mut watermark_modified = false; - let mut is_compacted = false; - - // Scan each accumulated trigger to decide - for trigger in &flow.triggers { - if let FlowTrigger::InputDatasetFlow(trigger) = trigger { - match &trigger.flow_result { - FlowResult::Empty | FlowResult::DatasetReset(_) => {} - FlowResult::DatasetCompact(_) => { - is_compacted = true; - } - FlowResult::DatasetUpdate(update) => { - // Compute increment since the first trigger by this dataset. - // Note: there might have been multiple updates since that time. - // We are only recording the first trigger of particular dataset. - if let FlowResultDatasetUpdate::Changed(update_result) = update { - let increment = self - .dataset_changes_service - .get_increment_since( - &trigger.dataset_id, - update_result.old_head.as_ref(), - ) - .await - .int_err()?; - - accumulated_records_count += increment.num_records; - watermark_modified |= increment.updated_watermark.is_some(); - } - } - } - } - } - - // The timeout for batching will happen at: - let batching_deadline = - flow.primary_trigger().trigger_time() + *transform_rule.max_batching_interval(); - - // Accumulated something if at least some input changed or watermark was touched - let accumulated_something = accumulated_records_count > 0 || watermark_modified; - - // The condition is satisfied if - // - we crossed the number of new records thresholds - // - or waited long enough, assuming - // - there is at least some change of the inputs - // - watermark got touched - let satisfied = accumulated_something - && (accumulated_records_count >= transform_rule.min_records_to_await() - || evaluation_time >= batching_deadline); - - // Set batching condition data, but only during the first rule evaluation. - if !matches!( - flow.start_condition.as_ref(), - Some(FlowStartCondition::Batching(_)) - ) { - flow.set_relevant_start_condition( - self.time_source.now(), - FlowStartCondition::Batching(FlowStartConditionBatching { - active_transform_rule: *transform_rule, - batching_deadline, - }), - ) - .int_err()?; - } - - // If we accumulated at least something (records or watermarks), - // the upper bound of potential finish time for batching is known - if accumulated_something || is_compacted { - // Finish immediately if satisfied, or not later than the deadline - let batching_finish_time = if satisfied || is_compacted { - evaluation_time - } else { - batching_deadline - }; - - // Throttling boundary correction - let corrected_finish_time = - std::cmp::max(batching_finish_time, throttling_boundary_time); - - let should_activate = match self.find_planned_flow_activation_time(flow.flow_id) { - Some(activation_time) => activation_time > corrected_finish_time, - None => true, - }; - if should_activate { - self.enqueue_flow(flow.flow_id, corrected_finish_time)?; - } - - // If batching is over, it's start condition is no longer valid. - // However, set throttling condition, if it applies - if (satisfied || is_compacted) && throttling_boundary_time > batching_finish_time { - self.indicate_throttling_activity( - flow, - throttling_boundary_time, - batching_finish_time, - )?; - } - } - - Ok(()) - } - - fn indicate_throttling_activity( - &self, - flow: &mut Flow, - wake_up_at: DateTime, - shifted_from: DateTime, - ) -> Result<(), InternalError> { - flow.set_relevant_start_condition( - self.time_source.now(), - FlowStartCondition::Throttling(FlowStartConditionThrottling { - interval: self.run_config.mandatory_throttling_period, - wake_up_at, - shifted_from, - }), - ) - .int_err()?; - Ok(()) - } - - fn find_pending_flow(&self, flow_key: &FlowKey) -> Option { - let state = self.state.lock().unwrap(); - state.pending_flows.try_get_pending_flow(flow_key) - } - - fn find_planned_flow_activation_time(&self, flow_id: FlowID) -> Option> { - self.state - .lock() - .unwrap() - .time_wheel - .get_planned_flow_activation_time(flow_id) - } - - #[tracing::instrument(level = "trace", skip_all, fields(?flow_key, ?trigger))] - async fn make_new_flow( - &self, - flow_key: FlowKey, - trigger: FlowTrigger, - config_snapshot: Option, - ) -> Result { - let flow = Flow::new( - self.time_source.now(), - self.flow_event_store.new_flow_id(), - flow_key, - trigger, - config_snapshot, - ); - - let mut state = self.state.lock().unwrap(); - state - .pending_flows - .add_pending_flow(flow.flow_key.clone(), flow.flow_id); - - Ok(flow) - } - - async fn flow_run_stats(&self, flow_key: &FlowKey) -> Result { - match flow_key { - FlowKey::Dataset(fk_dataset) => { - self.flow_event_store - .get_dataset_flow_run_stats(&fk_dataset.dataset_id, fk_dataset.flow_type) - .await - } - FlowKey::System(fk_system) => { - self.flow_event_store - .get_system_flow_run_stats(fk_system.flow_type) - .await - } - } - } - - #[tracing::instrument(level = "trace", skip_all, fields(%flow_id, %activation_time))] - fn enqueue_flow( - &self, - flow_id: FlowID, - activation_time: DateTime, - ) -> Result<(), InternalError> { - self.state - .lock() - .unwrap() - .time_wheel - .activate_at(activation_time, flow_id); - Ok(()) - } - - #[tracing::instrument(level = "trace", skip_all, fields(flow_id = %flow.flow_id))] - async fn schedule_flow_task( - &self, - flow: &mut Flow, - schedule_time: DateTime, - ) -> Result { - let logical_plan = - self.make_task_logical_plan(&flow.flow_key, flow.config_snapshot.as_ref())?; - - let task = self - .task_scheduler - .create_task(logical_plan) - .await - .int_err()?; - - flow.set_relevant_start_condition( - schedule_time, - FlowStartCondition::Executor(FlowStartConditionExecutor { - task_id: task.task_id, - }), - ) - .int_err()?; - - flow.on_task_scheduled(schedule_time, task.task_id) - .int_err()?; - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - - let mut state = self.state.lock().unwrap(); - state - .pending_flows - .track_flow_task(flow.flow_id, task.task_id); - - Ok(task.task_id) - } - - async fn abort_flow(&self, flow_id: FlowID) -> Result<(), InternalError> { - // Mark flow as aborted - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - - self.abort_flow_impl(&mut flow).await - } - - async fn abort_flow_impl(&self, flow: &mut Flow) -> Result<(), InternalError> { - // Abort flow itself - flow.abort(self.time_source.now()).int_err()?; - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - - // Cancel associated tasks, but first drop task -> flow associations - { - let mut state = self.state.lock().unwrap(); - for task_id in &flow.task_ids { - state.pending_flows.untrack_flow_by_task(*task_id); - } - } - for task_id in &flow.task_ids { - self.task_scheduler.cancel_task(*task_id).await.int_err()?; - } - - Ok(()) - } - - /// Creates task logical plan that corresponds to template - pub fn make_task_logical_plan( - &self, - flow_key: &FlowKey, - maybe_config_snapshot: Option<&FlowConfigurationSnapshot>, - ) -> Result { - match flow_key { - FlowKey::Dataset(flow_key) => match flow_key.flow_type { - DatasetFlowType::Ingest | DatasetFlowType::ExecuteTransform => { - let mut fetch_uncacheable = false; - if let Some(config_snapshot) = maybe_config_snapshot - && let FlowConfigurationSnapshot::Ingest(ingest_rule) = config_snapshot - { - fetch_uncacheable = ingest_rule.fetch_uncacheable; - } - Ok(LogicalPlan::UpdateDataset(UpdateDataset { - dataset_id: flow_key.dataset_id.clone(), - fetch_uncacheable, - })) - } - DatasetFlowType::HardCompaction => { - let mut max_slice_size: Option = None; - let mut max_slice_records: Option = None; - let mut keep_metadata_only = false; - - if let Some(config_snapshot) = maybe_config_snapshot - && let FlowConfigurationSnapshot::Compaction(compaction_rule) = - config_snapshot - { - max_slice_size = compaction_rule.max_slice_size(); - max_slice_records = compaction_rule.max_slice_records(); - keep_metadata_only = - matches!(compaction_rule, CompactionRule::MetadataOnly(_)); - }; - - Ok(LogicalPlan::HardCompactionDataset(HardCompactionDataset { - dataset_id: flow_key.dataset_id.clone(), - max_slice_size, - max_slice_records, - keep_metadata_only, - })) - } - DatasetFlowType::Reset => { - if let Some(config_rule) = maybe_config_snapshot - && let FlowConfigurationSnapshot::Reset(reset_rule) = config_rule - { - return Ok(LogicalPlan::Reset(ResetDataset { - dataset_id: flow_key.dataset_id.clone(), - new_head_hash: reset_rule.new_head_hash.clone(), - old_head_hash: reset_rule.old_head_hash.clone(), - recursive: reset_rule.recursive, - })); - } - InternalError::bail("Reset flow cannot be called without configuration") - } - }, - FlowKey::System(flow_key) => { - match flow_key.flow_type { - // TODO: replace on correct logical plan - SystemFlowType::GC => Ok(LogicalPlan::Probe(Probe { - dataset_id: None, - busy_time: Some(std::time::Duration::from_secs(20)), - end_with_outcome: Some(TaskOutcome::Success(TaskResult::Empty)), - })), - } - } - } - } - - async fn make_downstream_dependencies_flow_plans( - &self, - fk_dataset: &FlowKeyDataset, - maybe_config_snapshot: Option<&FlowConfigurationSnapshot>, - ) -> Result, InternalError> { - // ToDo: extend dependency graph with possibility to fetch downstream - // dependencies by owner - let dependent_dataset_ids: Vec<_> = self - .dependency_graph_service - .get_downstream_dependencies(&fk_dataset.dataset_id) - .await - .int_err()? - .collect() - .await; - - let mut plans: Vec = vec![]; - if dependent_dataset_ids.is_empty() { - return Ok(plans); - } - - match self.classify_dependent_trigger_type(fk_dataset.flow_type, maybe_config_snapshot) { - DownstreamDependencyTriggerType::TriggerAllEnabledExecuteTransform => { - let guard = self.state.lock().unwrap(); - for dataset_id in dependent_dataset_ids { - if let Some(transform_rule) = - guard.active_configs.try_get_dataset_transform_rule( - &dataset_id, - DatasetFlowType::ExecuteTransform, - ) - { - plans.push(DownstreamDependencyFlowPlan { - flow_key: FlowKeyDataset::new( - dataset_id, - DatasetFlowType::ExecuteTransform, - ) - .into(), - flow_trigger_context: FlowTriggerContext::Batching(transform_rule), - maybe_config_snapshot: None, - }); - }; - } - } - - DownstreamDependencyTriggerType::TriggerOwnHardCompaction => { - let dataset_owner_account_ids = self - .dataset_ownership_service - .get_dataset_owners(&fk_dataset.dataset_id) - .await?; - - for dependent_dataset_id in dependent_dataset_ids { - for owner_account_id in &dataset_owner_account_ids { - if self - .dataset_ownership_service - .is_dataset_owned_by(&dependent_dataset_id, owner_account_id) - .await? - { - plans.push(DownstreamDependencyFlowPlan { - flow_key: FlowKeyDataset::new( - dependent_dataset_id.clone(), - DatasetFlowType::HardCompaction, - ) - .into(), - flow_trigger_context: FlowTriggerContext::Unconditional, - // Currently we trigger Hard compaction recursively only in keep - // metadata only mode - maybe_config_snapshot: Some(FlowConfigurationSnapshot::Compaction( - CompactionRule::MetadataOnly(CompactionRuleMetadataOnly { - recursive: true, - }), - )), - }); - break; - } - } - } - } - - DownstreamDependencyTriggerType::Empty => {} - } - - Ok(plans) - } - - fn classify_dependent_trigger_type( - &self, - dataset_flow_type: DatasetFlowType, - maybe_config_snapshot: Option<&FlowConfigurationSnapshot>, - ) -> DownstreamDependencyTriggerType { - match dataset_flow_type { - DatasetFlowType::Ingest | DatasetFlowType::ExecuteTransform => { - DownstreamDependencyTriggerType::TriggerAllEnabledExecuteTransform - } - DatasetFlowType::HardCompaction => { - if let Some(config_snapshot) = &maybe_config_snapshot - && let FlowConfigurationSnapshot::Compaction(compaction_rule) = config_snapshot - { - if compaction_rule.recursive() { - DownstreamDependencyTriggerType::TriggerOwnHardCompaction - } else { - DownstreamDependencyTriggerType::Empty - } - } else { - DownstreamDependencyTriggerType::TriggerAllEnabledExecuteTransform - } - } - DatasetFlowType::Reset => { - if let Some(config_snapshot) = &maybe_config_snapshot - && let FlowConfigurationSnapshot::Reset(reset_rule) = config_snapshot - && reset_rule.recursive - { - DownstreamDependencyTriggerType::TriggerOwnHardCompaction - } else { - DownstreamDependencyTriggerType::Empty - } - } - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl FlowService for FlowServiceImpl { - /// Runs the update main loop - #[tracing::instrument(level = "info", skip_all)] - async fn run(&self, planned_start_time: DateTime) -> Result<(), InternalError> { - // Mark running started - self.state.lock().unwrap().running = true; - - // Initial scheduling - DatabaseTransactionRunner::new(self.catalog.clone()) - .transactional_with2( - |flow_configuration_service: Arc, - outbox: Arc| async move { - let start_time = self.round_time(planned_start_time)?; - self.initialize_auto_polling_flows_from_configurations( - flow_configuration_service.as_ref(), - start_time, - ) - .await?; - - // Publish progress event - outbox - .post_message( - MESSAGE_PRODUCER_KAMU_FLOW_SERVICE, - FlowServiceUpdatedMessage { - update_time: start_time, - update_details: FlowServiceUpdateDetails::Loaded, - }, - ) - .await?; - - Ok(()) - }, - ) - .await?; - - // Main scanning loop - let main_loop_span = tracing::debug_span!("FlowService main loop"); - let _ = main_loop_span.enter(); - - loop { - let current_time = self.time_source.now(); - - // Do we have a timeslot scheduled? - let maybe_nearest_activation_time = { - let state = self.state.lock().unwrap(); - state.time_wheel.nearest_activation_moment() - }; - - // Is it time to execute it yet? - if let Some(nearest_activation_time) = maybe_nearest_activation_time - && nearest_activation_time <= current_time - { - // Run scheduling for current time slot. Should not throw any errors - self.run_current_timeslot(nearest_activation_time).await?; - - // Publish progress event - DatabaseTransactionRunner::new(self.catalog.clone()) - .transactional_with(|outbox: Arc| async move { - outbox - .post_message( - MESSAGE_PRODUCER_KAMU_FLOW_SERVICE, - FlowServiceUpdatedMessage { - update_time: nearest_activation_time, - update_details: FlowServiceUpdateDetails::ExecutedTimeslot, - }, - ) - .await - }) - .await?; - } - - self.time_source.sleep(self.run_config.awaiting_step).await; - } - } - - /// Triggers the specified flow manually, unless it's already waiting - #[tracing::instrument( - level = "debug", - skip_all, - fields(?flow_key, %initiator_account_id) - )] - async fn trigger_manual_flow( - &self, - trigger_time: DateTime, - flow_key: FlowKey, - initiator_account_id: AccountID, - config_snapshot_maybe: Option, - ) -> Result { - let activation_time = self.round_time(trigger_time)?; - - self.trigger_flow_common( - &flow_key, - FlowTrigger::Manual(FlowTriggerManual { - trigger_time: activation_time, - initiator_account_id, - }), - FlowTriggerContext::Unconditional, - config_snapshot_maybe, - ) - .await - .map_err(RequestFlowError::Internal) - } - - /// Returns states of flows associated with a given dataset - /// ordered by creation time from newest to oldest - /// Applies specified filters - #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id, ?filters, ?pagination))] - async fn list_all_flows_by_dataset( - &self, - dataset_id: &DatasetID, - filters: DatasetFlowFilters, - pagination: FlowPaginationOpts, - ) -> Result { - let total_count = self - .flow_event_store - .get_count_flows_by_dataset(dataset_id, &filters) - .await?; - - let dataset_id = dataset_id.clone(); - - let matched_stream = Box::pin(async_stream::try_stream! { - let relevant_flow_ids: Vec<_> = self - .flow_event_store - .get_all_flow_ids_by_dataset(&dataset_id, filters, pagination) - .try_collect() - .await?; - - // TODO: implement batch loading - for flow_id in relevant_flow_ids { - let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; - yield flow.into(); - } - }); - - Ok(FlowStateListing { - matched_stream, - total_count, - }) - } - - /// Returns initiators of flows associated with a given dataset - /// ordered by creation time from newest to oldest - #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id))] - async fn list_all_flow_initiators_by_dataset( - &self, - dataset_id: &DatasetID, - ) -> Result { - Ok(FlowInitiatorListing { - matched_stream: self - .flow_event_store - .get_unique_flow_initiator_ids_by_dataset(dataset_id), - }) - } - - /// Returns states of flows associated with a given account - /// ordered by creation time from newest to oldest - /// Applies specified filters - #[tracing::instrument(level = "debug", skip_all, fields(%account_id, ?filters, ?pagination))] - async fn list_all_flows_by_account( - &self, - account_id: &AccountID, - filters: AccountFlowFilters, - pagination: FlowPaginationOpts, - ) -> Result { - let owned_dataset_ids = self - .dataset_ownership_service - .get_owned_datasets(account_id) - .await - .map_err(ListFlowsByDatasetError::Internal)?; - - let filtered_dataset_ids = if !filters.by_dataset_ids.is_empty() { - owned_dataset_ids - .into_iter() - .filter(|dataset_id| filters.by_dataset_ids.contains(dataset_id)) - .collect() - } else { - owned_dataset_ids - }; - - let mut total_count = 0; - let dataset_flow_filters = DatasetFlowFilters { - by_flow_status: filters.by_flow_status, - by_flow_type: filters.by_flow_type, - by_initiator: filters.by_initiator, - }; - - for dataset_id in &filtered_dataset_ids { - total_count += self - .flow_event_store - .get_count_flows_by_dataset(dataset_id, &dataset_flow_filters) - .await?; - } - - let account_dataset_ids: HashSet = HashSet::from_iter(filtered_dataset_ids); - - let matched_stream = Box::pin(async_stream::try_stream! { - let relevant_flow_ids: Vec<_> = self - .flow_event_store - .get_all_flow_ids_by_datasets(account_dataset_ids, &dataset_flow_filters, pagination) - .try_collect() - .await - .int_err()?; - - // TODO: implement batch loading - for flow_id in relevant_flow_ids { - let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; - yield flow.into(); - } - }); - - Ok(FlowStateListing { - matched_stream, - total_count, - }) - } - - /// Returns datasets with flows associated with a given account - /// ordered by creation time from newest to oldest. - #[tracing::instrument(level = "debug", skip_all, fields(%account_id))] - async fn list_all_datasets_with_flow_by_account( - &self, - account_id: &AccountID, - ) -> Result { - let owned_dataset_ids = self - .dataset_ownership_service - .get_owned_datasets(account_id) - .await - .map_err(ListFlowsByDatasetError::Internal)?; - - let matched_stream = Box::pin(async_stream::try_stream! { - for dataset_id in &owned_dataset_ids { - let dataset_flows_count = self - .flow_event_store - .get_count_flows_by_dataset(dataset_id, &Default::default()) - .await?; - - if dataset_flows_count > 0 { - yield dataset_id.clone(); - } - } - }); - - Ok(FlowDatasetListing { matched_stream }) - } - - /// Returns states of system flows - /// ordered by creation time from newest to oldest - /// Applies specified filters - #[tracing::instrument(level = "debug", skip_all, fields(?filters, ?pagination))] - async fn list_all_system_flows( - &self, - filters: SystemFlowFilters, - pagination: FlowPaginationOpts, - ) -> Result { - let total_count = self - .flow_event_store - .get_count_system_flows(&filters) - .await - .int_err()?; - - let matched_stream = Box::pin(async_stream::try_stream! { - let relevant_flow_ids: Vec<_> = self - .flow_event_store - .get_all_system_flow_ids(filters, pagination) - .try_collect() - .await?; - - // TODO: implement batch loading - for flow_id in relevant_flow_ids { - let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; - yield flow.into(); - } - }); - - Ok(FlowStateListing { - matched_stream, - total_count, - }) - } - - /// Returns state of all flows, whether they are system-level or - /// dataset-bound, ordered by creation time from newest to oldest - #[tracing::instrument(level = "debug", skip_all, fields(?pagination))] - async fn list_all_flows( - &self, - pagination: FlowPaginationOpts, - ) -> Result { - let total_count = self.flow_event_store.get_count_all_flows().await?; - - let matched_stream = Box::pin(async_stream::try_stream! { - let all_flows: Vec<_> = self - .flow_event_store - .get_all_flow_ids(pagination) - .try_collect() - .await?; - - // TODO: implement batch loading - for flow_id in all_flows { - let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await.int_err()?; - yield flow.into(); - } - }); - - Ok(FlowStateListing { - matched_stream, - total_count, - }) - } - - /// Returns current state of a given flow - #[tracing::instrument(level = "debug", skip_all, fields(%flow_id))] - async fn get_flow(&self, flow_id: FlowID) -> Result { - let flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await?; - Ok(flow.into()) - } - - /// Attempts to cancel the tasks already scheduled for the given flow - #[tracing::instrument( - level = "debug", - skip_all, - fields(%flow_id) - )] - async fn cancel_scheduled_tasks( - &self, - flow_id: FlowID, - ) -> Result { - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()).await?; - - // Cancel tasks for flows in Waiting/Running state. - // Ignore in Finished state - match flow.status() { - FlowStatus::Waiting | FlowStatus::Running => { - // Abort current flow and it's scheduled tasks - self.abort_flow_impl(&mut flow).await?; - } - FlowStatus::Finished => { /* Skip, idempotence */ } - } - - Ok(flow.into()) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl FlowServiceTestDriver for FlowServiceImpl { - /// Pretends running started - fn mimic_running_started(&self) { - let mut state = self.state.lock().unwrap(); - state.running = true; - } - - /// Pretends it is time to schedule the given flow that was not waiting for - /// anything else - async fn mimic_flow_scheduled( - &self, - flow_id: FlowID, - schedule_time: DateTime, - ) -> Result { - { - let mut state = self.state.lock().unwrap(); - state.time_wheel.cancel_flow_activation(flow_id).int_err()?; - } - - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - let task_id = self.schedule_flow_task(&mut flow, schedule_time).await?; - Ok(task_id) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -impl MessageConsumer for FlowServiceImpl {} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl MessageConsumerT for FlowServiceImpl { - #[tracing::instrument(level = "debug", skip_all, fields(?message))] - async fn consume_message( - &self, - target_catalog: &Catalog, - message: &TaskProgressMessage, - ) -> Result<(), InternalError> { - match message { - TaskProgressMessage::Running(message) => { - // Is this a task associated with flows? - let maybe_flow_id = { - let state = self.state.lock().unwrap(); - if !state.running { - // Abort if running hasn't started yet - return Ok(()); - } - state.pending_flows.try_get_flow_id_by_task(message.task_id) - }; - - if let Some(flow_id) = maybe_flow_id { - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - flow.on_task_running(message.event_time, message.task_id) - .int_err()?; - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - - let outbox = target_catalog.get_one::().unwrap(); - outbox - .post_message( - MESSAGE_PRODUCER_KAMU_FLOW_SERVICE, - FlowServiceUpdatedMessage { - update_time: message.event_time, - update_details: FlowServiceUpdateDetails::FlowRunning, - }, - ) - .await?; - } - } - TaskProgressMessage::Finished(message) => { - // Is this a task associated with flows? - let maybe_flow_id = { - let state = self.state.lock().unwrap(); - if !state.running { - // Abort if running hasn't started yet - return Ok(()); - } - state.pending_flows.try_get_flow_id_by_task(message.task_id) - }; - - let finish_time = self.round_time(message.event_time)?; - - if let Some(flow_id) = maybe_flow_id { - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - flow.on_task_finished( - message.event_time, - message.task_id, - message.outcome.clone(), - ) - .int_err()?; - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - - { - let mut state = self.state.lock().unwrap(); - state.pending_flows.untrack_flow_by_task(message.task_id); - state.pending_flows.drop_pending_flow(&flow.flow_key); - } - - // In case of success: - // - execute followup method - if let Some(flow_result) = flow.try_result_as_ref() - && !flow_result.is_empty() - { - match flow.flow_key.get_type().success_followup_method() { - FlowSuccessFollowupMethod::Ignore => {} - FlowSuccessFollowupMethod::TriggerDependent => { - self.enqueue_dependent_flows(finish_time, &flow, flow_result) - .await?; - } - } - } - - // In case of success: - // - enqueue next auto-polling flow cycle - if message.outcome.is_success() { - self.try_enqueue_scheduled_auto_polling_flow_if_enabled( - finish_time, - &flow.flow_key, - ) - .await?; - } - - let outbox = target_catalog.get_one::().unwrap(); - outbox - .post_message( - MESSAGE_PRODUCER_KAMU_FLOW_SERVICE, - FlowServiceUpdatedMessage { - update_time: message.event_time, - update_details: FlowServiceUpdateDetails::FlowFinished, - }, - ) - .await?; - - // TODO: retry logic in case of failed outcome - } - } - } - - Ok(()) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl MessageConsumerT for FlowServiceImpl { - #[tracing::instrument(level = "debug", skip_all, fields(?message))] - async fn consume_message( - &self, - _: &Catalog, - message: &FlowConfigurationUpdatedMessage, - ) -> Result<(), InternalError> { - if message.paused { - let maybe_pending_flow_id = { - let mut state = self.state.lock().unwrap(); - if !state.running { - // Abort if running hasn't started yet - return Ok(()); - }; - - state.active_configs.drop_flow_config(&message.flow_key); - - let maybe_pending_flow_id = - state.pending_flows.drop_pending_flow(&message.flow_key); - if let Some(flow_id) = &maybe_pending_flow_id { - state - .time_wheel - .cancel_flow_activation(*flow_id) - .int_err()?; - } - maybe_pending_flow_id - }; - - if let Some(flow_id) = maybe_pending_flow_id { - self.abort_flow(flow_id).await?; - } - } else { - { - let state = self.state.lock().unwrap(); - if !state.running { - // Abort if running hasn't started yet - return Ok(()); - }; - } - - let activation_time = self.round_time(message.event_time)?; - self.activate_flow_configuration( - activation_time, - message.flow_key.clone(), - message.rule.clone(), - ) - .await?; - } - - Ok(()) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl MessageConsumerT for FlowServiceImpl { - #[tracing::instrument(level = "debug", skip_all, fields(?message))] - async fn consume_message( - &self, - _: &Catalog, - message: &DatasetLifecycleMessage, - ) -> Result<(), InternalError> { - match message { - DatasetLifecycleMessage::Deleted(message) => { - let flow_ids_2_abort = { - let mut state = self.state.lock().unwrap(); - if !state.running { - // Abort if running hasn't started yet - return Ok(()); - }; - - state - .active_configs - .drop_dataset_configs(&message.dataset_id); - - // For every possible dataset flow: - // - drop it from pending state - // - drop queued activations - // - collect ID of aborted flow - let mut flow_ids_2_abort: Vec<_> = - Vec::with_capacity(DatasetFlowType::all().len()); - for flow_type in DatasetFlowType::all() { - if let Some(flow_id) = state - .pending_flows - .drop_dataset_pending_flow(&message.dataset_id, *flow_type) - { - flow_ids_2_abort.push(flow_id); - state.time_wheel.cancel_flow_activation(flow_id).int_err()?; - } - } - flow_ids_2_abort - }; - - // Abort matched flows - for flow_id in flow_ids_2_abort { - let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) - .await - .int_err()?; - flow.abort(self.time_source.now()).int_err()?; - flow.save(self.flow_event_store.as_ref()).await.int_err()?; - } - - // Not deleting task->update association, it should be safe. - // Most of the time the outcome of the task will be "Cancelled". - // Even if task squeezes to succeed in between cancellations, - // it's safe: - // - we will record a successful update, no consequence - // - no further updates will be attempted (schedule - // deactivated above) - // - no dependent tasks will be launched (dependency graph - // erases neighbors) - } - - DatasetLifecycleMessage::Created(_) - | DatasetLifecycleMessage::DependenciesUpdated(_) => { - // No action required - } - } - - Ok(()) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, Eq, PartialEq)] -pub enum FlowTriggerContext { - Unconditional, - Scheduled(Schedule), - Batching(TransformRule), -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, Eq, PartialEq)] -pub struct DownstreamDependencyFlowPlan { - pub flow_key: FlowKey, - pub flow_trigger_context: FlowTriggerContext, - pub maybe_config_snapshot: Option, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -enum DownstreamDependencyTriggerType { - TriggerAllEnabledExecuteTransform, - TriggerOwnHardCompaction, - Empty, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow/flow_time_wheel.rs b/src/domain/flow-system/services/src/flow/flow_time_wheel_service_impl.rs similarity index 56% rename from src/domain/flow-system/services/src/flow/flow_time_wheel.rs rename to src/domain/flow-system/services/src/flow/flow_time_wheel_service_impl.rs index a525d503a..ba8b313af 100644 --- a/src/domain/flow-system/services/src/flow/flow_time_wheel.rs +++ b/src/domain/flow-system/services/src/flow/flow_time_wheel_service_impl.rs @@ -9,19 +9,67 @@ use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap}; +use std::sync::{Arc, Mutex}; use chrono::{DateTime, Utc}; -use kamu_flow_system::FlowID; -use thiserror::Error; +use dill::{component, interface, scope, Singleton}; +use kamu_flow_system::{ + FlowID, + FlowTimeWheelService, + TimeWheelCancelActivationError, + TimeWheelFlowNotPlannedError, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct FlowTimeWheelServiceImpl { + state: Arc>, +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Default)] -pub(crate) struct FlowTimeWheel { +struct State { flow_heap: BinaryHeap>, flow_activation_times_by_id: HashMap>, } +impl State { + fn is_flow_activation_planned_at( + &self, + flow_id: FlowID, + activation_moment: DateTime, + ) -> bool { + self.flow_activation_times_by_id + .get(&flow_id) + .is_some_and(|flow_activation_time| *flow_activation_time == activation_moment) + } + + fn clean_top_cancellations(&mut self) { + while let Some(ar) = self.flow_heap.peek() { + if self.is_flow_activation_planned_at(ar.0.flow_id, ar.0.activation_time) { + break; + } + + self.flow_heap.pop(); + } + } + + fn plan_flow(&mut self, flow_record: FlowRecord) { + self.flow_activation_times_by_id + .insert(flow_record.flow_id, flow_record.activation_time); + + self.flow_heap.push(Reverse(flow_record)); + } + + fn unplan_flow(&mut self, flow_id: FlowID) { + self.flow_activation_times_by_id.remove(&flow_id); + self.clean_top_cancellations(); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // TODO: assign a score, and use it as an ordering criteria for the tasks within // the same activation time #[derive(PartialEq, Eq, PartialOrd, Ord)] @@ -39,72 +87,83 @@ impl FlowRecord { } } -impl FlowTimeWheel { - pub fn nearest_activation_moment(&self) -> Option> { - self.flow_heap.peek().map(|ar| ar.0.activation_time) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn FlowTimeWheelService)] +#[scope(Singleton)] +impl FlowTimeWheelServiceImpl { + pub fn new() -> Self { + Self { + state: Arc::new(Mutex::new(State::default())), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl FlowTimeWheelService for FlowTimeWheelServiceImpl { + fn nearest_activation_moment(&self) -> Option> { + let guard = self.state.lock().unwrap(); + guard.flow_heap.peek().map(|ar| ar.0.activation_time) } - pub fn take_nearest_planned_flows(&mut self) -> Vec { - if self.flow_heap.is_empty() { + fn take_nearest_planned_flows(&self) -> Vec { + let mut guard = self.state.lock().unwrap(); + + if guard.flow_heap.is_empty() { vec![] } else { - let activation_moment = self.flow_heap.peek().unwrap().0.activation_time; + let activation_moment = guard.flow_heap.peek().unwrap().0.activation_time; let mut res: Vec<_> = Vec::new(); - while let Some(ar) = self.flow_heap.peek() { + while let Some(ar) = guard.flow_heap.peek() { if ar.0.activation_time > activation_moment { break; } - if self.is_flow_activation_planned_at(ar.0.flow_id, activation_moment) { + if guard.is_flow_activation_planned_at(ar.0.flow_id, activation_moment) { res.push(ar.0.flow_id); } - self.flow_heap.pop(); + guard.flow_heap.pop(); } - self.clean_top_cancellations(); + guard.clean_top_cancellations(); res } } - // TODO: maybe round activation time by a reasonable interval, like a minute, so - // that scoring logic might be inserted - pub fn activate_at(&mut self, activation_time: DateTime, flow_id: FlowID) { - match self.flow_activation_times_by_id.get(&flow_id) { + fn activate_at(&self, activation_time: DateTime, flow_id: FlowID) { + let mut guard = self.state.lock().unwrap(); + + match guard.flow_activation_times_by_id.get(&flow_id) { Some(earlier_activation_time) => { if activation_time < *earlier_activation_time { - self.unplan_flow(flow_id); - self.plan_flow(FlowRecord::new(activation_time, flow_id)); + guard.unplan_flow(flow_id); + guard.plan_flow(FlowRecord::new(activation_time, flow_id)); } } None => { - self.plan_flow(FlowRecord::new(activation_time, flow_id)); + guard.plan_flow(FlowRecord::new(activation_time, flow_id)); } } } - pub fn get_planned_flow_activation_time(&self, flow_id: FlowID) -> Option> { - self.flow_activation_times_by_id.get(&flow_id).copied() + fn get_planned_flow_activation_time(&self, flow_id: FlowID) -> Option> { + let guard = self.state.lock().unwrap(); + guard.flow_activation_times_by_id.get(&flow_id).copied() } - fn is_flow_activation_planned_at( + fn cancel_flow_activation( &self, flow_id: FlowID, - activation_moment: DateTime, - ) -> bool { - self.flow_activation_times_by_id - .get(&flow_id) - .is_some_and(|flow_activation_time| *flow_activation_time == activation_moment) - } - - pub fn cancel_flow_activation( - &mut self, - flow_id: FlowID, ) -> Result<(), TimeWheelCancelActivationError> { - if self.flow_activation_times_by_id.contains_key(&flow_id) { - self.unplan_flow(flow_id); + let mut guard = self.state.lock().unwrap(); + + if guard.flow_activation_times_by_id.contains_key(&flow_id) { + guard.unplan_flow(flow_id); Ok(()) } else { Err(TimeWheelCancelActivationError::FlowNotPlanned( @@ -112,42 +171,6 @@ impl FlowTimeWheel { )) } } - - fn plan_flow(&mut self, flow_record: FlowRecord) { - self.flow_activation_times_by_id - .insert(flow_record.flow_id, flow_record.activation_time); - - self.flow_heap.push(Reverse(flow_record)); - } - - fn unplan_flow(&mut self, flow_id: FlowID) { - self.flow_activation_times_by_id.remove(&flow_id); - self.clean_top_cancellations(); - } - - fn clean_top_cancellations(&mut self) { - while let Some(ar) = self.flow_heap.peek() { - if self.is_flow_activation_planned_at(ar.0.flow_id, ar.0.activation_time) { - break; - } - - self.flow_heap.pop(); - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Error, Debug)] -pub(crate) enum TimeWheelCancelActivationError { - #[error(transparent)] - FlowNotPlanned(TimeWheelFlowNotPlannedError), -} - -#[derive(Error, Debug)] -#[error("Flow '{flow_id}' not found planned in the time wheel")] -pub(crate) struct TimeWheelFlowNotPlannedError { - flow_id: FlowID, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -166,7 +189,7 @@ mod tests { #[test] fn test_sequential_scheduling() { - let mut timewheel = FlowTimeWheel::default(); + let timewheel = FlowTimeWheelServiceImpl::new(); assert!(timewheel.nearest_activation_moment().is_none()); let now = Utc::now(); @@ -174,20 +197,20 @@ mod tests { let moment_2 = now + Duration::try_seconds(20).unwrap(); let moment_3 = now + Duration::try_seconds(30).unwrap(); - schedule_flow(&mut timewheel, moment_1, FLOW_ID_1); - schedule_flow(&mut timewheel, moment_1, FLOW_ID_2); - schedule_flow(&mut timewheel, moment_2, FLOW_ID_3); - schedule_flow(&mut timewheel, moment_3, FLOW_ID_4); - schedule_flow(&mut timewheel, moment_3, FLOW_ID_5); + schedule_flow(&timewheel, moment_1, FLOW_ID_1); + schedule_flow(&timewheel, moment_1, FLOW_ID_2); + schedule_flow(&timewheel, moment_2, FLOW_ID_3); + schedule_flow(&timewheel, moment_3, FLOW_ID_4); + schedule_flow(&timewheel, moment_3, FLOW_ID_5); - check_next_time_slot(&mut timewheel, moment_1, &[FLOW_ID_1, FLOW_ID_2]); - check_next_time_slot(&mut timewheel, moment_2, &[FLOW_ID_3]); - check_next_time_slot(&mut timewheel, moment_3, &[FLOW_ID_4, FLOW_ID_5]); + check_next_time_slot(&timewheel, moment_1, &[FLOW_ID_1, FLOW_ID_2]); + check_next_time_slot(&timewheel, moment_2, &[FLOW_ID_3]); + check_next_time_slot(&timewheel, moment_3, &[FLOW_ID_4, FLOW_ID_5]); } #[test] fn test_random_order_scheduling() { - let mut timewheel = FlowTimeWheel::default(); + let timewheel = FlowTimeWheelServiceImpl::new(); assert!(timewheel.nearest_activation_moment().is_none()); let now = Utc::now(); @@ -195,20 +218,20 @@ mod tests { let moment_2 = now + Duration::try_seconds(20).unwrap(); let moment_3 = now + Duration::try_seconds(30).unwrap(); - schedule_flow(&mut timewheel, moment_2, FLOW_ID_3); - schedule_flow(&mut timewheel, moment_3, FLOW_ID_5); - schedule_flow(&mut timewheel, moment_1, FLOW_ID_1); - schedule_flow(&mut timewheel, moment_3, FLOW_ID_4); - schedule_flow(&mut timewheel, moment_1, FLOW_ID_2); + schedule_flow(&timewheel, moment_2, FLOW_ID_3); + schedule_flow(&timewheel, moment_3, FLOW_ID_5); + schedule_flow(&timewheel, moment_1, FLOW_ID_1); + schedule_flow(&timewheel, moment_3, FLOW_ID_4); + schedule_flow(&timewheel, moment_1, FLOW_ID_2); - check_next_time_slot(&mut timewheel, moment_1, &[FLOW_ID_1, FLOW_ID_2]); - check_next_time_slot(&mut timewheel, moment_2, &[FLOW_ID_3]); - check_next_time_slot(&mut timewheel, moment_3, &[FLOW_ID_4, FLOW_ID_5]); + check_next_time_slot(&timewheel, moment_1, &[FLOW_ID_1, FLOW_ID_2]); + check_next_time_slot(&timewheel, moment_2, &[FLOW_ID_3]); + check_next_time_slot(&timewheel, moment_3, &[FLOW_ID_4, FLOW_ID_5]); } #[test] fn test_cancellations() { - let mut timewheel = FlowTimeWheel::default(); + let timewheel = FlowTimeWheelServiceImpl::new(); assert!(timewheel.nearest_activation_moment().is_none()); let now = Utc::now(); @@ -216,11 +239,11 @@ mod tests { let moment_2 = now + Duration::try_seconds(20).unwrap(); let moment_3 = now + Duration::try_seconds(30).unwrap(); - schedule_flow(&mut timewheel, moment_1, FLOW_ID_1); - schedule_flow(&mut timewheel, moment_1, FLOW_ID_2); - schedule_flow(&mut timewheel, moment_2, FLOW_ID_3); - schedule_flow(&mut timewheel, moment_3, FLOW_ID_4); - schedule_flow(&mut timewheel, moment_3, FLOW_ID_5); + schedule_flow(&timewheel, moment_1, FLOW_ID_1); + schedule_flow(&timewheel, moment_1, FLOW_ID_2); + schedule_flow(&timewheel, moment_2, FLOW_ID_3); + schedule_flow(&timewheel, moment_3, FLOW_ID_4); + schedule_flow(&timewheel, moment_3, FLOW_ID_5); timewheel .cancel_flow_activation(FlowID::new(FLOW_ID_1)) @@ -232,17 +255,17 @@ mod tests { .cancel_flow_activation(FlowID::new(FLOW_ID_5)) .unwrap(); - check_next_time_slot(&mut timewheel, moment_1, &[FLOW_ID_2]); - check_next_time_slot(&mut timewheel, moment_3, &[FLOW_ID_4]); + check_next_time_slot(&timewheel, moment_1, &[FLOW_ID_2]); + check_next_time_slot(&timewheel, moment_3, &[FLOW_ID_4]); assert!(timewheel.nearest_activation_moment().is_none()); } - fn schedule_flow(timewheel: &mut FlowTimeWheel, moment: DateTime, flow_id: u64) { + fn schedule_flow(timewheel: &dyn FlowTimeWheelService, moment: DateTime, flow_id: u64) { timewheel.activate_at(moment, FlowID::new(flow_id)); } fn check_next_time_slot( - timewheel: &mut FlowTimeWheel, + timewheel: &dyn FlowTimeWheelService, moment: DateTime, flow_ids: &[u64], ) { diff --git a/src/domain/flow-system/services/src/flow/mod.rs b/src/domain/flow-system/services/src/flow/mod.rs index 2f961c5b8..654600ec5 100644 --- a/src/domain/flow-system/services/src/flow/mod.rs +++ b/src/domain/flow-system/services/src/flow/mod.rs @@ -7,9 +7,14 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -mod active_configs_state; -mod flow_service_impl; -mod flow_time_wheel; -mod pending_flows_state; +mod flow_abort_helper; +mod flow_enqueue_helper; +mod flow_executor_impl; +mod flow_query_service_impl; +mod flow_time_wheel_service_impl; -pub use flow_service_impl::*; +pub(crate) use flow_abort_helper::*; +pub(crate) use flow_enqueue_helper::*; +pub use flow_executor_impl::*; +pub use flow_query_service_impl::*; +pub use flow_time_wheel_service_impl::*; diff --git a/src/domain/flow-system/services/src/flow/pending_flows_state.rs b/src/domain/flow-system/services/src/flow/pending_flows_state.rs deleted file mode 100644 index 1f2a41e3e..000000000 --- a/src/domain/flow-system/services/src/flow/pending_flows_state.rs +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::collections::HashMap; - -use kamu_flow_system::*; -use kamu_task_system::*; -use opendatafabric::DatasetID; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Default)] -pub(crate) struct PendingFlowsState { - pending_dataset_flows: HashMap, - pending_system_flows: HashMap, - pending_flows_by_tasks: HashMap, -} - -impl PendingFlowsState { - pub fn add_pending_flow(&mut self, flow_key: FlowKey, flow_id: FlowID) { - match flow_key { - FlowKey::Dataset(flow_key) => { - self.pending_dataset_flows.insert(flow_key, flow_id); - } - FlowKey::System(flow_key) => { - self.pending_system_flows - .insert(flow_key.flow_type, flow_id); - } - } - } - - pub fn track_flow_task(&mut self, flow_id: FlowID, task_id: TaskID) { - self.pending_flows_by_tasks.insert(task_id, flow_id); - } - - pub fn drop_pending_flow(&mut self, flow_key: &FlowKey) -> Option { - match flow_key { - FlowKey::Dataset(flow_key) => { - self.drop_dataset_pending_flow(&flow_key.dataset_id, flow_key.flow_type) - } - FlowKey::System(flow_key) => self.pending_system_flows.remove(&flow_key.flow_type), - } - } - - pub fn drop_dataset_pending_flow( - &mut self, - dataset_id: &DatasetID, - flow_type: DatasetFlowType, - ) -> Option { - self.pending_dataset_flows - .remove(BorrowedFlowKeyDataset::new(dataset_id, flow_type).as_trait()) - } - - pub fn untrack_flow_by_task(&mut self, task_id: TaskID) { - self.pending_flows_by_tasks.remove(&task_id); - } - - pub fn try_get_pending_flow(&self, flow_key: &FlowKey) -> Option { - match flow_key { - FlowKey::Dataset(flow_key) => self - .pending_dataset_flows - .get( - BorrowedFlowKeyDataset::new(&flow_key.dataset_id, flow_key.flow_type) - .as_trait(), - ) - .copied(), - FlowKey::System(flow_key) => { - self.pending_system_flows.get(&flow_key.flow_type).copied() - } - } - } - - pub fn try_get_flow_id_by_task(&self, task_id: TaskID) -> Option { - self.pending_flows_by_tasks.get(&task_id).copied() - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/flow_configuration/flow_configuration_service_impl.rs b/src/domain/flow-system/services/src/flow_configuration/flow_configuration_service_impl.rs index 40f50611e..9ca6453e7 100644 --- a/src/domain/flow-system/services/src/flow_configuration/flow_configuration_service_impl.rs +++ b/src/domain/flow-system/services/src/flow_configuration/flow_configuration_service_impl.rs @@ -203,7 +203,7 @@ impl FlowConfigurationService for FlowConfigurationServiceImpl { } } - let dataset_ids: Vec<_> = self.event_store.list_all_dataset_ids().await.try_collect().await?; + let dataset_ids: Vec<_> = self.event_store.list_all_dataset_ids().try_collect().await?; for dataset_id in dataset_ids { for dataset_flow_type in DatasetFlowType::all() { diff --git a/src/domain/flow-system/services/src/lib.rs b/src/domain/flow-system/services/src/lib.rs index 31fdf7e86..1a360a0e7 100644 --- a/src/domain/flow-system/services/src/lib.rs +++ b/src/domain/flow-system/services/src/lib.rs @@ -12,10 +12,12 @@ // Re-exports pub use kamu_flow_system as domain; +mod dependencies; mod flow; mod flow_configuration; mod messages; +pub use dependencies::*; pub use flow::*; pub use flow_configuration::*; pub use messages::*; diff --git a/src/domain/flow-system/services/src/messages/flow_message_consumers.rs b/src/domain/flow-system/services/src/messages/flow_message_consumers.rs index fa31cce4b..64eb11480 100644 --- a/src/domain/flow-system/services/src/messages/flow_message_consumers.rs +++ b/src/domain/flow-system/services/src/messages/flow_message_consumers.rs @@ -12,6 +12,6 @@ pub const MESSAGE_CONSUMER_KAMU_FLOW_CONFIGURATION_SERVICE: &str = "dev.kamu.domain.flow-system.FlowConfigurationService"; -pub const MESSAGE_CONSUMER_KAMU_FLOW_SERVICE: &str = "dev.kamu.domain.flow-system.FlowService"; +pub const MESSAGE_CONSUMER_KAMU_FLOW_EXECUTOR: &str = "dev.kamu.domain.flow-system.FlowExecutor"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/messages/flow_message_producers.rs b/src/domain/flow-system/services/src/messages/flow_message_producers.rs index f87d196c3..b1dcecd92 100644 --- a/src/domain/flow-system/services/src/messages/flow_message_producers.rs +++ b/src/domain/flow-system/services/src/messages/flow_message_producers.rs @@ -12,6 +12,6 @@ pub const MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE: &str = "dev.kamu.domain.flow-system.FlowConfigurationService"; -pub const MESSAGE_PRODUCER_KAMU_FLOW_SERVICE: &str = "dev.kamu.domain.flow-system.FlowService"; +pub const MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR: &str = "dev.kamu.domain.flow-system.FlowExecutor"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/tests/tests/mod.rs b/src/domain/flow-system/services/tests/tests/mod.rs index b79e473d5..c3a4c5e76 100644 --- a/src/domain/flow-system/services/tests/tests/mod.rs +++ b/src/domain/flow-system/services/tests/tests/mod.rs @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0. mod test_flow_configuration_service_impl; -mod test_flow_service_impl; +mod test_flow_executor_impl; mod utils; pub(crate) use utils::*; diff --git a/src/domain/flow-system/services/tests/tests/test_flow_service_impl.rs b/src/domain/flow-system/services/tests/tests/test_flow_executor_impl.rs similarity index 93% rename from src/domain/flow-system/services/tests/tests/test_flow_service_impl.rs rename to src/domain/flow-system/services/tests/tests/test_flow_executor_impl.rs index a579d3d78..88989ea63 100644 --- a/src/domain/flow-system/services/tests/tests/test_flow_service_impl.rs +++ b/src/domain/flow-system/services/tests/tests/test_flow_executor_impl.rs @@ -61,15 +61,17 @@ async fn test_read_initial_config_and_queue_without_waiting() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: start running at 10ms, finish at 20ms let foo_task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -83,6 +85,7 @@ async fn test_read_initial_config_and_queue_without_waiting() { // Task 1: start running at 90ms, finish at 100ms let foo_task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(90).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -152,6 +155,139 @@ async fn test_read_initial_config_and_queue_without_waiting() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[test_log::test(tokio::test)] +async fn test_read_initial_config_shouldnt_queue_in_recovery_case() { + let harness = FlowHarness::new().await; + + // Create a "foo" root dataset + let foo_create_result = harness + .create_root_dataset(DatasetAlias { + dataset_name: DatasetName::new_unchecked("foo"), + account_name: None, + }) + .await; + let foo_id = foo_create_result.dataset_handle.id; + let foo_flow_key = FlowKey::dataset(foo_id.clone(), DatasetFlowType::Ingest); + + // Remember start time + let start_time = harness + .now_datetime() + .duration_round(Duration::try_milliseconds(SCHEDULING_ALIGNMENT_MS).unwrap()) + .unwrap(); + + // Configure ingestion schedule every 60ms, but use event store directly + harness + .flow_configuration_event_store + .save_events( + &foo_flow_key, + vec![FlowConfigurationEventCreated { + event_time: start_time, + flow_key: foo_flow_key.clone(), + paused: false, + rule: FlowConfigurationRule::IngestRule(IngestRule { + fetch_uncacheable: false, + schedule_condition: Duration::try_milliseconds(60).unwrap().into(), + }), + } + .into()], + ) + .await + .unwrap(); + + harness.eager_initialization().await; + + // Mimic we are recovering from server restart, where a waiting flow for "foo" + // existed already + let flow_id = harness.flow_event_store.new_flow_id().await.unwrap(); + harness + .flow_event_store + .save_events( + &flow_id, + vec![ + FlowEventInitiated { + event_time: start_time, + flow_id, + flow_key: foo_flow_key.clone(), + trigger: FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: start_time, + }), + config_snapshot: None, + } + .into(), + FlowEventStartConditionUpdated { + event_time: Utc::now(), + flow_id, + start_condition: FlowStartCondition::Schedule(FlowStartConditionSchedule { + wake_up_at: start_time + Duration::try_milliseconds(100).unwrap(), + }), + last_trigger_index: 0, + } + .into(), + ], + ) + .await + .unwrap(); + + // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); + tokio::select! { + // Run API service + res = harness.flow_service.run() => res.int_err(), + + // Run simulation script and task drivers + _ = async { + // Task 0: start running at 110ms, finish at 120ms + let foo_task0_driver = harness.task_driver(TaskDriverArgs { + task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), + dataset_id: Some(foo_id.clone()), + run_since_start: Duration::try_milliseconds(110).unwrap(), + finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), + expected_logical_plan: LogicalPlan::UpdateDataset(UpdateDataset { + dataset_id: foo_id.clone(), + fetch_uncacheable: false + }), + }); + let foo_task0_handle = foo_task0_driver.run(); + + // Main simulation boundary - 130ms total + let sim_handle = harness.advance_time(Duration::try_milliseconds(130).unwrap()); + tokio::join!(foo_task0_handle, sim_handle) + } => Ok(()) + } + .unwrap(); + + let test_flow_listener = harness.catalog.get_one::().unwrap(); + test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); + + pretty_assertions::assert_eq!( + format!("{}", test_flow_listener.as_ref()), + indoc::indoc!( + r#" + #0: +0ms: + "foo" Ingest: + Flow ID = 0 Waiting AutoPolling Schedule(wakeup=100ms) + + #1: +100ms: + "foo" Ingest: + Flow ID = 0 Waiting AutoPolling Executor(task=0, since=100ms) + + #2: +110ms: + "foo" Ingest: + Flow ID = 0 Running(task=0) + + #3: +120ms: + "foo" Ingest: + Flow ID = 1 Waiting AutoPolling Schedule(wakeup=180ms) + Flow ID = 0 Finished Success + + "# + ) + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_cron_config() { // Note: this test runs with 1s step, CRON does not apply to milliseconds @@ -171,20 +307,6 @@ async fn test_cron_config() { .await; let foo_id = foo_create_result.dataset_handle.id; - harness - .set_dataset_flow_ingest( - harness.now_datetime(), - foo_id.clone(), - DatasetFlowType::Ingest, - IngestRule { - fetch_uncacheable: false, - schedule_condition: Schedule::Cron(ScheduleCron { - source_5component_cron_expression: String::from(""), - cron_schedule: cron::Schedule::from_str("*/5 * * * * *").unwrap(), - }), - }, - ) - .await; harness.eager_initialization().await; // Remember start time @@ -193,16 +315,21 @@ async fn test_cron_config() { .duration_round(Duration::try_seconds(1).unwrap()) .unwrap(); + let test_flow_listener = harness.catalog.get_one::().unwrap(); + test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); + // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: start running at 6s, finish at 7s let foo_task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_seconds(6).unwrap(), finish_in_with: Some((Duration::try_seconds(1).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -213,38 +340,63 @@ async fn test_cron_config() { }); let foo_task0_handle = foo_task0_driver.run(); - // Main simulation boundary - 12s total: at 10s 2nd scheduling happens - let sim_handle = harness.advance_time_custom_alignment(Duration::try_seconds(1).unwrap(), Duration::try_seconds(12).unwrap()); - tokio::join!(foo_task0_handle, sim_handle) + // Main simulation script + let main_handle = async { + // Wait 2 s + harness.advance_time_custom_alignment(Duration::try_seconds(1).unwrap(), Duration::try_seconds(2).unwrap()).await; + + // Enable CRON config (we are skipping moment 0s) + harness + .set_dataset_flow_ingest( + harness.now_datetime(), + foo_id.clone(), + DatasetFlowType::Ingest, + IngestRule { + fetch_uncacheable: false, + schedule_condition: Schedule::Cron(ScheduleCron { + source_5component_cron_expression: String::from(""), + cron_schedule: cron::Schedule::from_str("*/5 * * * * *").unwrap(), + }), + }, + ) + .await; + test_flow_listener + .make_a_snapshot(start_time + Duration::try_seconds(1).unwrap()) + .await; + + // Main simulation boundary - 12s total: at 10s 2nd scheduling happens; + harness.advance_time_custom_alignment(Duration::try_seconds(1).unwrap(), Duration::try_seconds(11).unwrap()).await; + }; + + tokio::join!(foo_task0_handle, main_handle) } => Ok(()) } .unwrap(); - let test_flow_listener = harness.catalog.get_one::().unwrap(); - test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); - pretty_assertions::assert_eq!( format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: + + #1: +1000ms: "foo" Ingest: Flow ID = 0 Waiting AutoPolling Schedule(wakeup=5000ms) - #1: +5000ms: + #2: +5000ms: "foo" Ingest: Flow ID = 0 Waiting AutoPolling Executor(task=0, since=5000ms) - #2: +6000ms: + #3: +6000ms: "foo" Ingest: Flow ID = 0 Running(task=0) - #3: +7000ms: + #4: +7000ms: "foo" Ingest: Flow ID = 1 Waiting AutoPolling Schedule(wakeup=10000ms) Flow ID = 0 Finished Success - #4: +10000ms: + #5: +10000ms: "foo" Ingest: Flow ID = 1 Waiting AutoPolling Executor(task=1, since=10000ms) Flow ID = 0 Finished Success @@ -304,15 +456,17 @@ async fn test_manual_trigger() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -326,6 +480,7 @@ async fn test_manual_trigger() { // Task 1: "for" start running at 60ms, finish at 70ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(60).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -339,6 +494,7 @@ async fn test_manual_trigger() { // Task 2: "bar" start running at 100ms, finish at 110ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(100).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -518,15 +674,17 @@ async fn test_ingest_trigger_with_ingest_config() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -540,6 +698,7 @@ async fn test_ingest_trigger_with_ingest_config() { // Task 1: "for" start running at 60ms, finish at 70ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(60).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -553,6 +712,7 @@ async fn test_ingest_trigger_with_ingest_config() { // Task 2: "bar" start running at 100ms, finish at 110ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(100).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -722,15 +882,17 @@ async fn test_manual_trigger_compaction() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(20).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -745,6 +907,7 @@ async fn test_manual_trigger_compaction() { let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(60).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -887,15 +1050,17 @@ async fn test_manual_trigger_reset() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 20ms, finish at 110ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(create_dataset_result.dataset_handle.id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(90).unwrap(), TaskOutcome::Success(TaskResult::ResetDatasetResult(TaskResetDatasetResult { new_head: Multihash::from_digest_sha3_256(b"new-head") })))), @@ -1027,9 +1192,10 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1043,6 +1209,7 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { // Task 0: "foo" start running at 20ms, finish at 90ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(create_foo_result.dataset_handle.id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(70).unwrap(), TaskOutcome::Success(TaskResult::ResetDatasetResult(TaskResetDatasetResult { new_head: Multihash::from_digest_sha3_256(b"new-head") })))), @@ -1058,6 +1225,7 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { // Task 1: "foo_bar" start running at 110ms, finish at 180sms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_baz_id.clone()), run_since_start: Duration::try_milliseconds(110).unwrap(), finish_in_with: Some( @@ -1085,6 +1253,7 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { // Task 2: "foo_bar_baz" start running at 200ms, finish at 240ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_bar_id.clone()), run_since_start: Duration::try_milliseconds(200).unwrap(), finish_in_with: Some( @@ -1228,15 +1397,17 @@ async fn test_manual_trigger_compaction_with_config() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 30ms, finish at 40ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(30).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -1291,6 +1462,8 @@ async fn test_manual_trigger_compaction_with_config() { ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivatives() { let max_slice_size = 1_000_000u64; @@ -1352,9 +1525,10 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1368,6 +1542,7 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati // Task 0: "foo" start running at 20ms, finish at 90ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some( @@ -1395,6 +1570,7 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati // Task 1: "foo_bar" start running at 110ms, finish at 180sms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_baz_id.clone()), run_since_start: Duration::try_milliseconds(110).unwrap(), finish_in_with: Some( @@ -1422,6 +1598,7 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati // Task 2: "foo_bar_baz" start running at 200ms, finish at 240ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_bar_id.clone()), run_since_start: Duration::try_milliseconds(200).unwrap(), finish_in_with: Some( @@ -1583,9 +1760,10 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1599,6 +1777,7 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { // Task 0: "foo" start running at 20ms, finish at 90ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some( @@ -1626,6 +1805,7 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { // Task 1: "foo_bar" start running at 110ms, finish at 180sms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_bar_id.clone()), run_since_start: Duration::try_milliseconds(110).unwrap(), finish_in_with: Some( @@ -1653,6 +1833,7 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { // Task 2: "foo_bar_baz" start running at 200ms, finish at 240ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_bar_baz_id.clone()), run_since_start: Duration::try_milliseconds(200).unwrap(), finish_in_with: Some( @@ -1816,9 +1997,10 @@ async fn test_manual_trigger_keep_metadata_only_without_recursive_compaction() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1832,6 +2014,7 @@ async fn test_manual_trigger_keep_metadata_only_without_recursive_compaction() { // Task 0: "foo" start running at 20ms, finish at 90ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some( @@ -1956,15 +2139,17 @@ async fn test_manual_trigger_keep_metadata_only_compaction_multiple_accounts() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 80ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(70).unwrap(), TaskOutcome::Success(TaskResult::CompactionDatasetResult(TaskCompactionDatasetResult { @@ -1993,6 +2178,7 @@ async fn test_manual_trigger_keep_metadata_only_compaction_multiple_accounts() { // Task 1: "foo_bar" hard_compaction start running at 110ms, finish at 180ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(foo_bar_id.clone()), run_since_start: Duration::try_milliseconds(110).unwrap(), // Send some PullResult with records to bypass batching condition @@ -2123,15 +2309,17 @@ async fn test_dataset_flow_configuration_paused_resumed_modified() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2145,6 +2333,7 @@ async fn test_dataset_flow_configuration_paused_resumed_modified() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2348,15 +2537,17 @@ async fn test_respect_last_success_time_when_schedule_resumes() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2370,6 +2561,7 @@ async fn test_respect_last_success_time_when_schedule_resumes() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2568,15 +2760,17 @@ async fn test_dataset_deleted() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2590,6 +2784,7 @@ async fn test_dataset_deleted() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2767,15 +2962,17 @@ async fn test_task_completions_trigger_next_loop_on_success() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -2789,6 +2986,7 @@ async fn test_task_completions_trigger_next_loop_on_success() { // Task 1: "bar" start running at 20ms, finish at 30ms with failure let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Failed(TaskError::Empty))), @@ -2802,6 +3000,7 @@ async fn test_task_completions_trigger_next_loop_on_success() { // Task 1: "baz" start running at 30ms, finish at 40ms with cancellation let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(baz_id.clone()), run_since_start: Duration::try_milliseconds(30).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Cancelled)), @@ -2995,15 +3194,17 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -3017,6 +3218,7 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), // Send some PullResult with records to bypass batching condition @@ -3036,6 +3238,7 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { // Task 2: "foo" start running at 110ms, finish at 120ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(110).unwrap(), // Send some PullResult with records to bypass batching condition @@ -3055,6 +3258,7 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { // Task 3: "bar" start running at 130ms, finish at 140ms let task3_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(3), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(130).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -3221,9 +3425,10 @@ async fn test_throttling_manual_triggers() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -3254,6 +3459,7 @@ async fn test_throttling_manual_triggers() { // Task 0: "foo" start running at 40ms, finish at 50ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(40).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -3406,15 +3612,17 @@ async fn test_throttling_derived_dataset_with_2_parents() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -3433,6 +3641,7 @@ async fn test_throttling_derived_dataset_with_2_parents() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -3451,6 +3660,7 @@ async fn test_throttling_derived_dataset_with_2_parents() { // Task 2: "baz" start running at 30ms, finish at 50ms (simulate longer run) let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(baz_id.clone()), run_since_start: Duration::try_milliseconds(30).unwrap(), finish_in_with: Some((Duration::try_milliseconds(20).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -3464,6 +3674,7 @@ async fn test_throttling_derived_dataset_with_2_parents() { // Task 3: "foo" start running at 130ms, finish at 140ms let task3_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(3), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(130).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -3482,6 +3693,7 @@ async fn test_throttling_derived_dataset_with_2_parents() { // Task 4: "baz" start running at 160ms, finish at 170ms let task4_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(4), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "5")]), dataset_id: Some(baz_id.clone()), run_since_start: Duration::try_milliseconds(160).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -3495,6 +3707,7 @@ async fn test_throttling_derived_dataset_with_2_parents() { // Task 5: "bar" start running at 190ms, finish at 200ms let task5_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(5), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "4")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(190).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -3873,15 +4086,17 @@ async fn test_batching_condition_records_reached() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -3900,6 +4115,7 @@ async fn test_batching_condition_records_reached() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -3918,6 +4134,7 @@ async fn test_batching_condition_records_reached() { // Task 2: "foo" start running at 80ms, finish at 90ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(80).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -3936,6 +4153,7 @@ async fn test_batching_condition_records_reached() { // Task 3: "foo" start running at 150ms, finish at 160ms let task3_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(3), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "4")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(150).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -3954,6 +4172,7 @@ async fn test_batching_condition_records_reached() { // Task 4: "bar" start running at 170ms, finish at 180ms let task4_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(4), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(170).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4197,15 +4416,17 @@ async fn test_batching_condition_timeout() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4224,6 +4445,7 @@ async fn test_batching_condition_timeout() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4242,6 +4464,7 @@ async fn test_batching_condition_timeout() { // Task 2: "foo" start running at 80ms, finish at 90ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(80).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4262,6 +4485,7 @@ async fn test_batching_condition_timeout() { // Task 4: "bar" start running at 250ms, finish at 2560ms let task4_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(4), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(250).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4473,15 +4697,17 @@ async fn test_batching_condition_watermark() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4500,6 +4726,7 @@ async fn test_batching_condition_watermark() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4518,6 +4745,7 @@ async fn test_batching_condition_watermark() { // Task 2: "foo" start running at 70ms, finish at 80ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(70).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4538,6 +4766,7 @@ async fn test_batching_condition_watermark() { // Task 4: "bar" start running at 290ms, finish at 300ms let task4_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(4), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(290).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4819,15 +5048,17 @@ async fn test_batching_condition_with_2_inputs() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4846,6 +5077,7 @@ async fn test_batching_condition_with_2_inputs() { // Task 1: "bar" start running at 20ms, finish at 30ms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(20).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4864,6 +5096,7 @@ async fn test_batching_condition_with_2_inputs() { // Task 2: "baz" start running at 30ms, finish at 40ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), dataset_id: Some(baz_id.clone()), run_since_start: Duration::try_milliseconds(30).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4882,6 +5115,7 @@ async fn test_batching_condition_with_2_inputs() { // Task 3: "foo" start running at 110ms, finish at 120ms let task3_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(3), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "3")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(110).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4900,6 +5134,7 @@ async fn test_batching_condition_with_2_inputs() { // Task 4: "bar" start running at 160ms, finish at 170ms let task4_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(4), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "4")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(160).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -4918,6 +5153,7 @@ async fn test_batching_condition_with_2_inputs() { // Task 5: "foo" start running at 210ms, finish at 220ms let task5_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(5), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "6")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(210).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult { @@ -4936,6 +5172,7 @@ async fn test_batching_condition_with_2_inputs() { // Task 6: "baz" start running at 230ms, finish at 240ms let task6_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(6), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "5")]), dataset_id: Some(baz_id.clone()), run_since_start: Duration::try_milliseconds(230).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::UpdateDatasetResult(TaskUpdateDatasetResult{ @@ -5282,15 +5519,17 @@ async fn test_list_all_flow_initiators() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(20).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -5305,6 +5544,7 @@ async fn test_list_all_flow_initiators() { let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(60).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -5351,7 +5591,7 @@ async fn test_list_all_flow_initiators() { .unwrap(); let foo_dataset_initiators_list: Vec<_> = harness - .flow_service + .flow_query_service .list_all_flow_initiators_by_dataset(&foo_id) .await .unwrap() @@ -5363,7 +5603,7 @@ async fn test_list_all_flow_initiators() { assert_eq!(foo_dataset_initiators_list, [foo_account_id.clone()]); let bar_dataset_initiators_list: Vec<_> = harness - .flow_service + .flow_query_service .list_all_flow_initiators_by_dataset(&bar_id) .await .unwrap() @@ -5444,15 +5684,17 @@ async fn test_list_all_datasets_with_flow() { .unwrap(); // Run scheduler concurrently with manual triggers script + harness.flow_service.pre_run(start_time).await.unwrap(); tokio::select! { // Run API service - res = harness.flow_service.run(start_time) => res.int_err(), + res = harness.flow_service.run() => res.int_err(), // Run simulation script and task drivers _ = async { // Task 0: "foo" start running at 10ms, finish at 20ms let task0_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(0), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(foo_id.clone()), run_since_start: Duration::try_milliseconds(10).unwrap(), finish_in_with: Some((Duration::try_milliseconds(20).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -5467,6 +5709,7 @@ async fn test_list_all_datasets_with_flow() { let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), + task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), dataset_id: Some(bar_id.clone()), run_since_start: Duration::try_milliseconds(60).unwrap(), finish_in_with: Some((Duration::try_milliseconds(10).unwrap(), TaskOutcome::Success(TaskResult::Empty))), @@ -5513,7 +5756,7 @@ async fn test_list_all_datasets_with_flow() { .unwrap(); let foo_dataset_initiators_list: Vec<_> = harness - .flow_service + .flow_query_service .list_all_flow_initiators_by_dataset(&foo_id) .await .unwrap() @@ -5525,7 +5768,7 @@ async fn test_list_all_datasets_with_flow() { assert_eq!(foo_dataset_initiators_list, [foo_account_id.clone()]); let bar_dataset_initiators_list: Vec<_> = harness - .flow_service + .flow_query_service .list_all_flow_initiators_by_dataset(&bar_id) .await .unwrap() @@ -5537,7 +5780,7 @@ async fn test_list_all_datasets_with_flow() { assert_eq!(bar_dataset_initiators_list, [bar_account_id.clone()]); let all_datasets_with_flow: Vec<_> = harness - .flow_service + .flow_query_service .list_all_datasets_with_flow_by_account(&foo_account_id) .await .unwrap() @@ -5549,7 +5792,7 @@ async fn test_list_all_datasets_with_flow() { assert_eq!(all_datasets_with_flow, [foo_id]); let all_datasets_with_flow: Vec<_> = harness - .flow_service + .flow_query_service .list_all_datasets_with_flow_by_account(&bar_account_id) .await .unwrap() diff --git a/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs b/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs index bb447ea02..0c308c0f9 100644 --- a/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs +++ b/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs @@ -33,7 +33,7 @@ use kamu_flow_system::*; use kamu_flow_system_inmem::*; use kamu_flow_system_services::*; use kamu_task_system::{TaskProgressMessage, MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR}; -use kamu_task_system_inmem::InMemoryTaskSystemEventStore; +use kamu_task_system_inmem::InMemoryTaskEventStore; use kamu_task_system_services::TaskSchedulerImpl; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxImmediateImpl}; use opendatafabric::*; @@ -60,7 +60,10 @@ pub(crate) struct FlowHarness { pub catalog: dill::Catalog, pub dataset_repo: Arc, pub flow_configuration_service: Arc, - pub flow_service: Arc, + pub flow_configuration_event_store: Arc, + pub flow_service: Arc, + pub flow_query_service: Arc, + pub flow_event_store: Arc, pub auth_svc: Arc, pub fake_system_time_source: FakeSystemTimeSource, } @@ -118,13 +121,11 @@ impl FlowHarness { ) .bind::() .add::() - .add_value(FlowServiceRunConfig::new( + .add_value(FlowExecutorConfig::new( awaiting_step, mandatory_throttling_period, )) - .add::() .add::() - .add::() .add::() .add_value(fake_system_time_source.clone()) .bind::() @@ -151,12 +152,13 @@ impl FlowHarness { .add::() .add::() .add::() - .add::() + .add::() .add::() .add::() .add::(); NoOpDatabasePlugin::init_database_components(&mut b); + kamu_flow_system_services::register_dependencies(&mut b); register_message_dispatcher::( &mut b, @@ -170,9 +172,9 @@ impl FlowHarness { &mut b, MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, ); - register_message_dispatcher::( + register_message_dispatcher::( &mut b, - MESSAGE_PRODUCER_KAMU_FLOW_SERVICE, + MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, ); b.build() @@ -191,8 +193,13 @@ impl FlowHarness { .await .unwrap(); - let flow_service = catalog.get_one::().unwrap(); + let flow_service = catalog.get_one::().unwrap(); + let flow_query_service = catalog.get_one::().unwrap(); let flow_configuration_service = catalog.get_one::().unwrap(); + let flow_configuration_event_store = catalog + .get_one::() + .unwrap(); + let flow_event_store = catalog.get_one::().unwrap(); let dataset_repo = catalog.get_one::().unwrap(); let auth_svc = catalog.get_one::().unwrap(); @@ -200,7 +207,10 @@ impl FlowHarness { _tmp_dir: tmp_dir, catalog, flow_service, + flow_query_service, flow_configuration_service, + flow_configuration_event_store, + flow_event_store, dataset_repo, fake_system_time_source, auth_svc, @@ -414,11 +424,7 @@ impl FlowHarness { &self, args: ManualFlowTriggerArgs, ) -> ManualFlowTriggerDriver { - ManualFlowTriggerDriver::new( - self.catalog.get_one().unwrap(), - self.catalog.get_one().unwrap(), - args, - ) + ManualFlowTriggerDriver::new(self.catalog.clone(), self.catalog.get_one().unwrap(), args) } pub fn now_datetime(&self) -> DateTime { diff --git a/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs b/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs index 68cf30671..71d92c8ee 100644 --- a/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs +++ b/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs @@ -11,10 +11,11 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use chrono::{DateTime, Utc}; +use database_common::PaginationOpts; use dill::*; use internal_error::InternalError; use kamu_flow_system::*; -use kamu_flow_system_services::MESSAGE_PRODUCER_KAMU_FLOW_SERVICE; +use kamu_flow_system_services::MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR; use messaging_outbox::{ MessageConsumer, MessageConsumerMeta, @@ -27,7 +28,7 @@ use time_source::FakeSystemTimeSource; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) struct FlowSystemTestListener { - flow_service: Arc, + flow_query_service: Arc, fake_time_source: Arc, state: Arc>, } @@ -43,19 +44,19 @@ struct FlowSystemTestListenerState { #[component(pub)] #[scope(Singleton)] #[interface(dyn MessageConsumer)] -#[interface(dyn MessageConsumerT)] +#[interface(dyn MessageConsumerT)] #[meta(MessageConsumerMeta { consumer_name: "FlowSystemTestListener", - feeding_producers: &[MESSAGE_PRODUCER_KAMU_FLOW_SERVICE], + feeding_producers: &[MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR], durability: MessageConsumptionDurability::BestEffort, })] impl FlowSystemTestListener { pub(crate) fn new( - flow_service: Arc, + flow_query_service: Arc, fake_time_source: Arc, ) -> Self { Self { - flow_service, + flow_query_service, fake_time_source, state: Arc::new(Mutex::new(FlowSystemTestListenerState::default())), } @@ -64,8 +65,8 @@ impl FlowSystemTestListener { pub(crate) async fn make_a_snapshot(&self, update_time: DateTime) { use futures::TryStreamExt; let flows: Vec<_> = self - .flow_service - .list_all_flows(FlowPaginationOpts { + .flow_query_service + .list_all_flows(PaginationOpts { limit: 100, offset: 0, }) @@ -235,11 +236,11 @@ impl std::fmt::Display for FlowSystemTestListener { impl MessageConsumer for FlowSystemTestListener {} #[async_trait::async_trait] -impl MessageConsumerT for FlowSystemTestListener { +impl MessageConsumerT for FlowSystemTestListener { async fn consume_message( &self, _: &Catalog, - message: &FlowServiceUpdatedMessage, + message: &FlowExecutorUpdatedMessage, ) -> Result<(), InternalError> { self.make_a_snapshot(message.update_time).await; Ok(()) diff --git a/src/domain/flow-system/services/tests/tests/utils/manual_flow_trigger_driver.rs b/src/domain/flow-system/services/tests/tests/utils/manual_flow_trigger_driver.rs index 219f606f0..89c52df5e 100644 --- a/src/domain/flow-system/services/tests/tests/utils/manual_flow_trigger_driver.rs +++ b/src/domain/flow-system/services/tests/tests/utils/manual_flow_trigger_driver.rs @@ -10,16 +10,18 @@ use std::sync::Arc; use chrono::Duration; +use database_common::DatabaseTransactionRunner; +use dill::Catalog; use kamu_accounts::DEFAULT_ACCOUNT_ID; -use kamu_flow_system::{FlowKey, FlowService}; +use kamu_flow_system::{FlowKey, FlowQueryService}; use opendatafabric::AccountID; use time_source::SystemTimeSource; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) struct ManualFlowTriggerDriver { + catalog: Catalog, time_source: Arc, - flow_service: Arc, args: ManualFlowTriggerArgs, } @@ -31,13 +33,13 @@ pub(crate) struct ManualFlowTriggerArgs { impl ManualFlowTriggerDriver { pub(crate) fn new( + catalog: Catalog, time_source: Arc, - flow_service: Arc, args: ManualFlowTriggerArgs, ) -> Self { Self { + catalog, time_source, - flow_service, args, } } @@ -47,13 +49,20 @@ impl ManualFlowTriggerDriver { self.time_source.sleep(self.args.run_since_start).await; - self.flow_service - .trigger_manual_flow( - start_time + self.args.run_since_start, - self.args.flow_key, - self.args.initiator_id.unwrap_or(DEFAULT_ACCOUNT_ID.clone()), - None, - ) + DatabaseTransactionRunner::new(self.catalog.clone()) + .transactional(|transactional_catalog| async move { + let flow_query_service = transactional_catalog + .get_one::() + .unwrap(); + flow_query_service + .trigger_manual_flow( + start_time + self.args.run_since_start, + self.args.flow_key, + self.args.initiator_id.unwrap_or(DEFAULT_ACCOUNT_ID.clone()), + None, + ) + .await + }) .await .unwrap(); } diff --git a/src/domain/flow-system/services/tests/tests/utils/task_driver.rs b/src/domain/flow-system/services/tests/tests/utils/task_driver.rs index a9e571b1d..cf4032f8b 100644 --- a/src/domain/flow-system/services/tests/tests/utils/task_driver.rs +++ b/src/domain/flow-system/services/tests/tests/utils/task_driver.rs @@ -21,12 +21,13 @@ use tokio::task::yield_now; pub(crate) struct TaskDriver { time_source: Arc, outbox: Arc, - task_event_store: Arc, + task_event_store: Arc, args: TaskDriverArgs, } pub(crate) struct TaskDriverArgs { pub(crate) task_id: TaskID, + pub(crate) task_metadata: TaskMetadata, pub(crate) dataset_id: Option, pub(crate) run_since_start: Duration, pub(crate) finish_in_with: Option<(Duration, TaskOutcome)>, @@ -37,7 +38,7 @@ impl TaskDriver { pub(crate) fn new( time_source: Arc, outbox: Arc, - task_event_store: Arc, + task_event_store: Arc, args: TaskDriverArgs, ) -> Self { Self { @@ -66,6 +67,7 @@ impl TaskDriver { TaskProgressMessage::running( start_time + self.args.run_since_start, self.args.task_id, + self.args.task_metadata.clone(), ), ) .await @@ -82,6 +84,7 @@ impl TaskDriver { TaskProgressMessage::finished( start_time + self.args.run_since_start + finish_in, self.args.task_id, + self.args.task_metadata, with_outcome, ), ) diff --git a/src/domain/task-system/domain/Cargo.toml b/src/domain/task-system/domain/Cargo.toml index 181061cd5..e8ac01fdd 100644 --- a/src/domain/task-system/domain/Cargo.toml +++ b/src/domain/task-system/domain/Cargo.toml @@ -22,6 +22,7 @@ doctest = false [dependencies] +database-common = { workspace = true } enum-variants = { workspace = true } event-sourcing = { workspace = true } messaging-outbox = { workspace = true } @@ -32,5 +33,6 @@ kamu-core = { workspace = true } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } serde = { version = "1", features = ["derive"] } +sqlx = { version = "0.8", default-features = false, features = ["macros"] } thiserror = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } diff --git a/src/domain/task-system/domain/src/aggregates/task.rs b/src/domain/task-system/domain/src/aggregates/task.rs index 1b9e42b74..a825f1aee 100644 --- a/src/domain/task-system/domain/src/aggregates/task.rs +++ b/src/domain/task-system/domain/src/aggregates/task.rs @@ -15,11 +15,16 @@ use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Aggregate, Debug)] -pub struct Task(Aggregate); +pub struct Task(Aggregate); impl Task { /// Creates a task with a pending `TaskCreated` event - pub fn new(now: DateTime, task_id: TaskID, logical_plan: LogicalPlan) -> Self { + pub fn new( + now: DateTime, + task_id: TaskID, + logical_plan: LogicalPlan, + metadata: Option, + ) -> Self { Self( Aggregate::new( task_id, @@ -27,6 +32,7 @@ impl Task { event_time: now, task_id, logical_plan, + metadata, }, ) .unwrap(), @@ -44,7 +50,7 @@ impl Task { /// Task is queued or running and cancellation was not already requested pub fn can_cancel(&self) -> bool { - matches!(self.status, TaskStatus::Queued | TaskStatus::Running if !self.cancellation_requested) + matches!(self.status(), TaskStatus::Queued | TaskStatus::Running if !self.cancellation_requested) } /// Set cancellation flag (if not already set) @@ -73,6 +79,14 @@ impl Task { }; self.apply(event) } + + pub fn requeue(&mut self, now: DateTime) -> Result<(), ProjectionError> { + let event = TaskEventRequeued { + event_time: now, + task_id: self.task_id, + }; + self.apply(event) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/entities/mod.rs b/src/domain/task-system/domain/src/entities/mod.rs index b19e129f2..05c426e98 100644 --- a/src/domain/task-system/domain/src/entities/mod.rs +++ b/src/domain/task-system/domain/src/entities/mod.rs @@ -10,11 +10,13 @@ mod logical_plan; mod task_event; mod task_id; +mod task_metadata; mod task_state; mod task_status; pub use logical_plan::*; pub use task_event::*; pub use task_id::*; +pub use task_metadata::*; pub use task_state::*; pub use task_status::*; diff --git a/src/domain/task-system/domain/src/entities/task_event.rs b/src/domain/task-system/domain/src/entities/task_event.rs index 3097e551c..f6f22d610 100644 --- a/src/domain/task-system/domain/src/entities/task_event.rs +++ b/src/domain/task-system/domain/src/entities/task_event.rs @@ -9,7 +9,6 @@ use chrono::{DateTime, Utc}; use enum_variants::*; -use opendatafabric::DatasetID; use serde::{Deserialize, Serialize}; use super::*; @@ -23,6 +22,8 @@ pub enum TaskEvent { TaskCreated(TaskEventCreated), /// Task execution had started TaskRunning(TaskEventRunning), + /// Task execution has re-queued (switched from Running back to Queued) + TaskRequeued(TaskEventRequeued), /// Cancellation of task was requested (this is not immediate and task may /// still finish with a different outcome than cancelled) TaskCancelled(TaskEventCancelled), @@ -37,6 +38,7 @@ pub struct TaskEventCreated { pub event_time: DateTime, pub task_id: TaskID, pub logical_plan: LogicalPlan, + pub metadata: Option, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -49,6 +51,14 @@ pub struct TaskEventRunning { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TaskEventRequeued { + pub event_time: DateTime, + pub task_id: TaskID, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct TaskEventCancelled { pub event_time: DateTime, @@ -71,6 +81,7 @@ impl TaskEvent { match self { TaskEvent::TaskCreated(_) => "TaskEventCreated", TaskEvent::TaskRunning(_) => "TaskEventRunning", + TaskEvent::TaskRequeued(_) => "TaskEventRequeued", TaskEvent::TaskCancelled(_) => "TaskEventCancelled", TaskEvent::TaskFinished(_) => "TaskEventFinished", } @@ -80,6 +91,7 @@ impl TaskEvent { match self { TaskEvent::TaskCreated(e) => e.task_id, TaskEvent::TaskRunning(e) => e.task_id, + TaskEvent::TaskRequeued(e) => e.task_id, TaskEvent::TaskCancelled(e) => e.task_id, TaskEvent::TaskFinished(e) => e.task_id, } @@ -89,16 +101,17 @@ impl TaskEvent { match self { TaskEvent::TaskCreated(e) => e.event_time, TaskEvent::TaskRunning(e) => e.event_time, + TaskEvent::TaskRequeued(e) => e.event_time, TaskEvent::TaskCancelled(e) => e.event_time, TaskEvent::TaskFinished(e) => e.event_time, } } - pub fn dataset_id(&self) -> Option<&DatasetID> { - if let TaskEvent::TaskCreated(created) = self { - created.logical_plan.dataset_id() - } else { - None + pub fn new_status(&self) -> TaskStatus { + match self { + TaskEvent::TaskCreated(_) | TaskEvent::TaskRequeued(_) => TaskStatus::Queued, + TaskEvent::TaskRunning(_) => TaskStatus::Running, + TaskEvent::TaskCancelled(_) | TaskEvent::TaskFinished(_) => TaskStatus::Finished, } } } @@ -107,6 +120,7 @@ impl TaskEvent { impl_enum_with_variants!(TaskEvent); impl_enum_variant!(TaskEvent::TaskCreated(TaskEventCreated)); impl_enum_variant!(TaskEvent::TaskRunning(TaskEventRunning)); +impl_enum_variant!(TaskEvent::TaskRequeued(TaskEventRequeued)); impl_enum_variant!(TaskEvent::TaskCancelled(TaskEventCancelled)); impl_enum_variant!(TaskEvent::TaskFinished(TaskEventFinished)); diff --git a/src/domain/task-system/domain/src/entities/task_id.rs b/src/domain/task-system/domain/src/entities/task_id.rs index 397772243..690ac5ae9 100644 --- a/src/domain/task-system/domain/src/entities/task_id.rs +++ b/src/domain/task-system/domain/src/entities/task_id.rs @@ -7,20 +7,31 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::num::TryFromIntError; + use serde::{Deserialize, Serialize}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Uniquely identifies a task within a compute node deployment -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct TaskID(i64); +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)] +pub struct TaskID(u64); impl TaskID { - pub fn new(id: i64) -> Self { + pub fn new(id: u64) -> Self { Self(id) } } +impl TryFrom for TaskID { + type Error = TryFromIntError; + + fn try_from(val: i64) -> Result { + let id: u64 = u64::try_from(val)?; + Ok(Self::new(id)) + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// impl std::fmt::Display for TaskID { @@ -29,8 +40,18 @@ impl std::fmt::Display for TaskID { } } -impl From for i64 { +impl From for u64 { fn from(val: TaskID) -> Self { val.0 } } + +impl TryFrom for i64 { + type Error = TryFromIntError; + + fn try_from(val: TaskID) -> Result { + i64::try_from(val.0) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/entities/task_metadata.rs b/src/domain/task-system/domain/src/entities/task_metadata.rs new file mode 100644 index 000000000..f3781affe --- /dev/null +++ b/src/domain/task-system/domain/src/entities/task_metadata.rs @@ -0,0 +1,45 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Clone, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct TaskMetadata { + properties: HashMap, +} + +impl TaskMetadata { + pub fn from, TValue: Into>( + key_value_pairs: Vec<(TKey, TValue)>, + ) -> Self { + let mut properties = HashMap::new(); + for (k, v) in key_value_pairs { + properties.insert(k.into(), v.into()); + } + Self { properties } + } + + pub fn set_property, TValue: Into>( + &mut self, + key: TKey, + value: TValue, + ) { + self.properties.insert(key.into(), value.into()); + } + + pub fn try_get_property(&self, key: &str) -> Option { + self.properties.get(key).cloned() + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/entities/task_state.rs b/src/domain/task-system/domain/src/entities/task_state.rs index 8f6fe0638..2b36aeaa9 100644 --- a/src/domain/task-system/domain/src/entities/task_state.rs +++ b/src/domain/task-system/domain/src/entities/task_state.rs @@ -19,12 +19,14 @@ use crate::*; pub struct TaskState { /// Unique and stable identifier of this task pub task_id: TaskID, - /// Life-cycle status of a task - pub status: TaskStatus, + /// Outcome of a task + pub outcome: Option, /// Whether the task was ordered to be cancelled pub cancellation_requested: bool, /// Execution plan of the task pub logical_plan: LogicalPlan, + /// Optional associated metadata + pub metadata: TaskMetadata, /// Time when task was originally created and placed in a queue pub created_at: DateTime, @@ -36,6 +38,19 @@ pub struct TaskState { pub finished_at: Option>, } +impl TaskState { + /// Computes status + pub fn status(&self) -> TaskStatus { + if self.outcome.is_some() { + TaskStatus::Finished + } else if self.ran_at.is_some() { + TaskStatus::Running + } else { + TaskStatus::Queued + } + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// impl Projection for TaskState { @@ -51,11 +66,13 @@ impl Projection for TaskState { event_time, task_id, logical_plan, + metadata, }) => Ok(Self { task_id, - status: TaskStatus::Queued, + outcome: None, cancellation_requested: false, logical_plan, + metadata: metadata.unwrap_or_default(), created_at: event_time, ran_at: None, cancellation_requested_at: None, @@ -67,19 +84,20 @@ impl Projection for TaskState { assert_eq!(s.task_id, event.task_id()); match event { - E::TaskCreated(_) => Err(ProjectionError::new(Some(s), event)), E::TaskRunning(TaskEventRunning { event_time, .. }) - if s.status == TaskStatus::Queued => + if s.status() == TaskStatus::Queued => { Ok(Self { - status: TaskStatus::Running, ran_at: Some(event_time), ..s }) } + E::TaskRequeued(_) if s.status() == TaskStatus::Running => { + Ok(Self { ran_at: None, ..s }) + } E::TaskCancelled(TaskEventCancelled { event_time, .. }) - if s.status == TaskStatus::Queued - || s.status == TaskStatus::Running && !s.cancellation_requested => + if s.status() == TaskStatus::Queued + || s.status() == TaskStatus::Running && !s.cancellation_requested => { Ok(Self { cancellation_requested: true, @@ -91,16 +109,18 @@ impl Projection for TaskState { event_time, outcome, .. - }) if s.status == TaskStatus::Queued || s.status == TaskStatus::Running => { + }) if s.status() == TaskStatus::Queued || s.status() == TaskStatus::Running => { Ok(Self { - status: TaskStatus::Finished(outcome), + outcome: Some(outcome), finished_at: Some(event_time), ..s }) } - E::TaskRunning(_) | E::TaskCancelled(_) | E::TaskFinished(_) => { - Err(ProjectionError::new(Some(s), event)) - } + E::TaskCreated(_) + | E::TaskRunning(_) + | E::TaskRequeued(_) + | E::TaskCancelled(_) + | E::TaskFinished(_) => Err(ProjectionError::new(Some(s), event)), } } } diff --git a/src/domain/task-system/domain/src/entities/task_status.rs b/src/domain/task-system/domain/src/entities/task_status.rs index b33db46c4..61ccdbdc1 100644 --- a/src/domain/task-system/domain/src/entities/task_status.rs +++ b/src/domain/task-system/domain/src/entities/task_status.rs @@ -13,14 +13,15 @@ use serde::{Deserialize, Serialize}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, sqlx::Type)] +#[sqlx(type_name = "task_status_type", rename_all = "snake_case")] pub enum TaskStatus { /// Task is waiting for capacity to be allocated to it Queued, /// Task is being executed Running, /// Task has reached a certain final outcome - Finished(TaskOutcome), + Finished, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/messages/task_messages_types.rs b/src/domain/task-system/domain/src/messages/task_messages_types.rs index 1675593d6..9e909a928 100644 --- a/src/domain/task-system/domain/src/messages/task_messages_types.rs +++ b/src/domain/task-system/domain/src/messages/task_messages_types.rs @@ -11,7 +11,7 @@ use chrono::{DateTime, Utc}; use messaging_outbox::Message; use serde::{Deserialize, Serialize}; -use crate::{TaskID, TaskOutcome}; +use crate::{TaskID, TaskMetadata, TaskOutcome}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -22,17 +22,28 @@ pub enum TaskProgressMessage { } impl TaskProgressMessage { - pub fn running(event_time: DateTime, task_id: TaskID) -> Self { + pub fn running( + event_time: DateTime, + task_id: TaskID, + task_metadata: TaskMetadata, + ) -> Self { Self::Running(TaskProgressMessageRunning { event_time, task_id, + task_metadata, }) } - pub fn finished(event_time: DateTime, task_id: TaskID, outcome: TaskOutcome) -> Self { + pub fn finished( + event_time: DateTime, + task_id: TaskID, + task_metadata: TaskMetadata, + outcome: TaskOutcome, + ) -> Self { Self::Finished(TaskProgressMessageFinished { event_time, task_id, + task_metadata, outcome, }) } @@ -46,6 +57,7 @@ impl Message for TaskProgressMessage {} pub struct TaskProgressMessageRunning { pub event_time: DateTime, pub task_id: TaskID, + pub task_metadata: TaskMetadata, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -54,6 +66,7 @@ pub struct TaskProgressMessageRunning { pub struct TaskProgressMessageFinished { pub event_time: DateTime, pub task_id: TaskID, + pub task_metadata: TaskMetadata, pub outcome: TaskOutcome, } diff --git a/src/domain/task-system/domain/src/repos/mod.rs b/src/domain/task-system/domain/src/repos/mod.rs index 5088bc43e..389e9e769 100644 --- a/src/domain/task-system/domain/src/repos/mod.rs +++ b/src/domain/task-system/domain/src/repos/mod.rs @@ -7,6 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -mod task_system_event_store; +mod task_event_store; -pub use task_system_event_store::*; +pub use task_event_store::*; diff --git a/src/domain/task-system/domain/src/repos/task_system_event_store.rs b/src/domain/task-system/domain/src/repos/task_event_store.rs similarity index 66% rename from src/domain/task-system/domain/src/repos/task_system_event_store.rs rename to src/domain/task-system/domain/src/repos/task_event_store.rs index 80d20ce3a..3abfb92f0 100644 --- a/src/domain/task-system/domain/src/repos/task_system_event_store.rs +++ b/src/domain/task-system/domain/src/repos/task_event_store.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use database_common::PaginationOpts; use event_sourcing::EventStore; use opendatafabric::DatasetID; @@ -15,19 +16,31 @@ use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait TaskSystemEventStore: EventStore { +pub trait TaskEventStore: EventStore { /// Generates new unique task identifier async fn new_task_id(&self) -> Result; + /// Attempts to get the earliest queued task, if any + async fn try_get_queued_task(&self) -> Result, InternalError>; + + /// Returns list of tasks, which are in Running state, + /// from earliest to latest + fn get_running_tasks(&self, pagination: PaginationOpts) -> TaskIDStream; + + /// Returns total number of tasks, which are in Running state + async fn get_count_running_tasks(&self) -> Result; + /// Returns page of the tasks associated with the specified dataset in /// reverse chronological order based on creation time - async fn get_tasks_by_dataset( + /// Note: no longer used, but might be used in future (admin view) + fn get_tasks_by_dataset( &self, dataset_id: &DatasetID, - pagination: TaskPaginationOpts, + pagination: PaginationOpts, ) -> TaskIDStream; /// Returns total number of tasks associated with the specified dataset + /// Note: no longer used, but might be used in future (admin view) async fn get_count_tasks_by_dataset( &self, dataset_id: &DatasetID, @@ -39,10 +52,4 @@ pub trait TaskSystemEventStore: EventStore { pub type TaskIDStream<'a> = std::pin::Pin> + Send + 'a>>; -#[derive(Debug)] -pub struct TaskPaginationOpts { - pub offset: usize, - pub limit: usize, -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/services/mod.rs b/src/domain/task-system/domain/src/services/mod.rs index a33fff599..0129d8a8c 100644 --- a/src/domain/task-system/domain/src/services/mod.rs +++ b/src/domain/task-system/domain/src/services/mod.rs @@ -8,7 +8,9 @@ // by the Apache License, Version 2.0. mod task_executor; +mod task_logical_plan_runner; mod task_scheduler; pub use task_executor::*; +pub use task_logical_plan_runner::*; pub use task_scheduler::*; diff --git a/src/domain/task-system/domain/src/services/task_executor.rs b/src/domain/task-system/domain/src/services/task_executor.rs index 3fd3b7acf..b63588928 100644 --- a/src/domain/task-system/domain/src/services/task_executor.rs +++ b/src/domain/task-system/domain/src/services/task_executor.rs @@ -9,8 +9,18 @@ use crate::*; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[async_trait::async_trait] pub trait TaskExecutor: Sync + Send { + /// Runs initialization phase + async fn pre_run(&self) -> Result<(), InternalError>; + /// Runs the executor main loop async fn run(&self) -> Result<(), InternalError>; + + /// Runs single task only, blocks until it is available (for tests only!) + async fn run_single_task(&self) -> Result<(), InternalError>; } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/services/task_logical_plan_runner.rs b/src/domain/task-system/domain/src/services/task_logical_plan_runner.rs new file mode 100644 index 000000000..3a3993502 --- /dev/null +++ b/src/domain/task-system/domain/src/services/task_logical_plan_runner.rs @@ -0,0 +1,21 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use event_sourcing::InternalError; + +use crate::{LogicalPlan, TaskOutcome}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait TaskLogicalPlanRunner: Send + Sync { + async fn run_plan(&self, logical_plan: &LogicalPlan) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/services/task_scheduler.rs b/src/domain/task-system/domain/src/services/task_scheduler.rs index dbbe38f6a..1a26be269 100644 --- a/src/domain/task-system/domain/src/services/task_scheduler.rs +++ b/src/domain/task-system/domain/src/services/task_scheduler.rs @@ -9,7 +9,6 @@ use event_sourcing::LoadError; use kamu_core::DatasetNotFoundError; -use opendatafabric::DatasetID; use tokio_stream::Stream; use crate::*; @@ -18,16 +17,12 @@ use crate::*; #[async_trait::async_trait] pub trait TaskScheduler: Sync + Send { - /// Creates a new task from provided logical plan - async fn create_task(&self, plan: LogicalPlan) -> Result; - - /// Returns page of states of tasks associated with a given dataset ordered - /// by creation time from newest to oldest - async fn list_tasks_by_dataset( + /// Creates a new task from provided logical plan & metadata + async fn create_task( &self, - dataset_id: &DatasetID, - pagination: TaskPaginationOpts, - ) -> Result; + plan: LogicalPlan, + metadata: Option, + ) -> Result; /// Returns current state of a given task async fn get_task(&self, task_id: TaskID) -> Result; @@ -35,12 +30,8 @@ pub trait TaskScheduler: Sync + Send { /// Attempts to cancel the given task async fn cancel_task(&self, task_id: TaskID) -> Result; - /// Blocks until the next task is available for execution and takes it out - /// of the queue (called by [TaskExecutor]) - async fn take(&self) -> Result; - - /// A non-blocking version of [TaskScheduler::take()] - async fn try_take(&self) -> Result, TakeTaskError>; + /// Takes the earliest available queued task, if any, without blocking + async fn try_take(&self) -> Result, TakeTaskError>; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/Cargo.toml b/src/domain/task-system/services/Cargo.toml index a120cb25a..fe49d8cc4 100644 --- a/src/domain/task-system/services/Cargo.toml +++ b/src/domain/task-system/services/Cargo.toml @@ -43,4 +43,5 @@ tracing = { version = "0.1", default-features = false } kamu-task-system-inmem = { workspace = true } chrono = { version = "0.4", default-features = false } +mockall = "0.11" test-log = { version = "0.2", features = ["trace"] } diff --git a/src/domain/task-system/services/src/dependencies.rs b/src/domain/task-system/services/src/dependencies.rs new file mode 100644 index 000000000..e39492404 --- /dev/null +++ b/src/domain/task-system/services/src/dependencies.rs @@ -0,0 +1,22 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use dill::CatalogBuilder; + +use crate::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub fn register_dependencies(catalog_builder: &mut CatalogBuilder) { + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/src/lib.rs b/src/domain/task-system/services/src/lib.rs index 4d660b93a..6f22e3a9f 100644 --- a/src/domain/task-system/services/src/lib.rs +++ b/src/domain/task-system/services/src/lib.rs @@ -10,8 +10,12 @@ // Re-exports pub use kamu_task_system as domain; +mod dependencies; mod task_executor_impl; +mod task_logical_plan_runner_impl; mod task_scheduler_impl; +pub use dependencies::*; pub use task_executor_impl::*; +pub use task_logical_plan_runner_impl::*; pub use task_scheduler_impl::*; diff --git a/src/domain/task-system/services/src/task_executor_impl.rs b/src/domain/task-system/services/src/task_executor_impl.rs index bb3e83f5b..75a0c26a2 100644 --- a/src/domain/task-system/services/src/task_executor_impl.rs +++ b/src/domain/task-system/services/src/task_executor_impl.rs @@ -7,24 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use std::collections::HashMap; use std::sync::Arc; -use database_common::DatabaseTransactionRunner; +use database_common::{DatabaseTransactionRunner, PaginationOpts}; use dill::*; -use kamu_core::{ - CompactionOptions, - CompactionService, - DatasetRepository, - PollingIngestOptions, - PullError, - PullOptions, - PullService, - ResetError, - ResetService, - TransformError, -}; -use kamu_datasets::{DatasetEnvVar, DatasetEnvVarService}; use kamu_task_system::*; use messaging_outbox::{Outbox, OutboxExt}; use time_source::SystemTimeSource; @@ -33,7 +19,7 @@ use time_source::SystemTimeSource; pub struct TaskExecutorImpl { catalog: Catalog, - task_sched: Arc, + task_logical_plan_runner: Arc, time_source: Arc, } @@ -45,64 +31,110 @@ pub struct TaskExecutorImpl { impl TaskExecutorImpl { pub fn new( catalog: Catalog, - task_sched: Arc, + task_logical_plan_runner: Arc, time_source: Arc, ) -> Self { Self { catalog, - task_sched, + task_logical_plan_runner, time_source, } } - async fn take_task(&self) -> Result { - let task_id = self.task_sched.take().await.int_err()?; + async fn run_task_iteration(&self) -> Result<(), InternalError> { + let task = self.take_task().await?; + let task_outcome = self.run_task(&task).await?; + self.process_task_outcome(task, task_outcome).await?; + Ok(()) + } + async fn recover_running_tasks(&self) -> Result<(), InternalError> { + // Recovering tasks means we are re-queing tasks that started running, but got + // aborted due to server shutdown or crash DatabaseTransactionRunner::new(self.catalog.clone()) - .transactional_with2( - |event_store: Arc, outbox: Arc| async move { - let task = Task::load(task_id, event_store.as_ref()).await.int_err()?; + .transactional(|target_catalog: Catalog| async move { + let task_event_store = target_catalog.get_one::().unwrap(); + + // Total number of running tasks + let total_running_tasks = task_event_store.get_count_running_tasks().await?; + + // Processe them in pages + let mut processed_running_tasks = 0; + while processed_running_tasks < total_running_tasks { + // Load another page + use futures::TryStreamExt; + let running_task_ids: Vec<_> = task_event_store + .get_running_tasks(PaginationOpts { + offset: processed_running_tasks, + limit: 100, + }) + .try_collect() + .await?; + + for running_task_id in &running_task_ids { + // TODO: batch loading of tasks + let mut task = Task::load(*running_task_id, task_event_store.as_ref()) + .await + .int_err()?; + + // Requeue + task.requeue(self.time_source.now()).int_err()?; + task.save(task_event_store.as_ref()).await.int_err()?; + } + + processed_running_tasks += running_task_ids.len(); + } + + Ok(()) + }) + .await + } + + async fn take_task(&self) -> Result { + loop { + let maybe_task = DatabaseTransactionRunner::new(self.catalog.clone()) + .transactional(|target_catalog: Catalog| async move { + let task_scheduler = target_catalog.get_one::().unwrap(); + let maybe_task = task_scheduler.try_take().await.int_err()?; + let Some(task) = maybe_task else { + return Ok(None); + }; tracing::info!( - %task_id, + task_id = %task.task_id, logical_plan = ?task.logical_plan, "Executing task", ); + let outbox = target_catalog.get_one::().unwrap(); outbox .post_message( MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, - TaskProgressMessage::running(self.time_source.now(), task_id), + TaskProgressMessage::running( + self.time_source.now(), + task.task_id, + task.metadata.clone(), + ), ) .await?; - Ok(task) - }, - ) - .await - } + Ok(Some(task)) + }) + .await?; - async fn execute_task(&self, task: &Task) -> Result { - let task_outcome = match &task.logical_plan { - LogicalPlan::UpdateDataset(upd) => self.update_dataset_logical_plan(upd).await?, - LogicalPlan::Probe(Probe { - busy_time, - end_with_outcome, - .. - }) => { - if let Some(busy_time) = busy_time { - tokio::time::sleep(*busy_time).await; - } - end_with_outcome - .clone() - .unwrap_or(TaskOutcome::Success(TaskResult::Empty)) - } - LogicalPlan::Reset(reset_args) => self.reset_dataset_logical_plan(reset_args).await?, - LogicalPlan::HardCompactionDataset(hard_compaction_args) => { - self.hard_compaction_logical_plan(hard_compaction_args) - .await? + if let Some(task) = maybe_task { + return Ok(task); } - }; + + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + } + + async fn run_task(&self, task: &Task) -> Result { + let task_outcome = self + .task_logical_plan_runner + .run_plan(&task.logical_plan) + .await?; tracing::info!( task_id = %task.task_id, @@ -121,7 +153,7 @@ impl TaskExecutorImpl { ) -> Result<(), InternalError> { DatabaseTransactionRunner::new(self.catalog.clone()) .transactional_with2( - |event_store: Arc, outbox: Arc| async move { + |event_store: Arc, outbox: Arc| async move { // Refresh the task in case it was updated concurrently (e.g. late cancellation) task.update(event_store.as_ref()).await.int_err()?; task.finish(self.time_source.now(), task_outcome.clone()) @@ -134,6 +166,7 @@ impl TaskExecutorImpl { TaskProgressMessage::finished( self.time_source.now(), task.task_id, + task.metadata.clone(), task_outcome, ), ) @@ -144,139 +177,31 @@ impl TaskExecutorImpl { Ok(()) } - - async fn update_dataset_logical_plan( - &self, - update_dataset_args: &UpdateDataset, - ) -> Result { - let dataset_env_vars = DatabaseTransactionRunner::new(self.catalog.clone()) - .transactional_with( - |dataset_env_vars_svc: Arc| async move { - let dataset_env_vars = dataset_env_vars_svc - .get_all_dataset_env_vars_by_dataset_id( - &update_dataset_args.dataset_id, - None, - ) - .await - .int_err()?; - Ok(dataset_env_vars.list) - }, - ) - .await?; - let dataset_env_vars_hash_map = dataset_env_vars - .into_iter() - .map(|dataset_env_var| (dataset_env_var.key.clone(), dataset_env_var)) - .collect::>(); - let pull_options = PullOptions { - ingest_options: PollingIngestOptions { - dataset_env_vars: dataset_env_vars_hash_map, - fetch_uncacheable: update_dataset_args.fetch_uncacheable, - ..Default::default() - }, - ..Default::default() - }; - - let pull_svc = self.catalog.get_one::().int_err()?; - let maybe_pull_result = pull_svc - .pull( - &update_dataset_args.dataset_id.as_any_ref(), - pull_options, - None, - ) - .await; - - match maybe_pull_result { - Ok(pull_result) => Ok(TaskOutcome::Success(TaskResult::UpdateDatasetResult( - TaskUpdateDatasetResult { pull_result }, - ))), - Err(err) => match err { - PullError::TransformError(TransformError::InvalidInterval(_)) => { - Ok(TaskOutcome::Failed(TaskError::UpdateDatasetError( - UpdateDatasetTaskError::RootDatasetCompacted(RootDatasetCompactedError { - dataset_id: update_dataset_args.dataset_id.clone(), - }), - ))) - } - _ => Ok(TaskOutcome::Failed(TaskError::Empty)), - }, - } - } - - async fn reset_dataset_logical_plan( - &self, - reset_dataset_args: &ResetDataset, - ) -> Result { - let reset_svc = self.catalog.get_one::().int_err()?; - let dataset_repo = self.catalog.get_one::().int_err()?; - let dataset_handle = dataset_repo - .resolve_dataset_ref(&reset_dataset_args.dataset_id.as_local_ref()) - .await - .int_err()?; - - let reset_result_maybe = reset_svc - .reset_dataset( - &dataset_handle, - reset_dataset_args.new_head_hash.as_ref(), - reset_dataset_args.old_head_hash.as_ref(), - ) - .await; - match reset_result_maybe { - Ok(new_head) => Ok(TaskOutcome::Success(TaskResult::ResetDatasetResult( - TaskResetDatasetResult { new_head }, - ))), - Err(err) => match err { - ResetError::BlockNotFound(_) => Ok(TaskOutcome::Failed( - TaskError::ResetDatasetError(ResetDatasetTaskError::ResetHeadNotFound), - )), - _ => Ok(TaskOutcome::Failed(TaskError::Empty)), - }, - } - } - - async fn hard_compaction_logical_plan( - &self, - hard_compaction_args: &HardCompactionDataset, - ) -> Result { - let compaction_svc = self.catalog.get_one::().int_err()?; - let dataset_repo = self.catalog.get_one::().int_err()?; - let dataset_handle = dataset_repo - .resolve_dataset_ref(&hard_compaction_args.dataset_id.as_local_ref()) - .await - .int_err()?; - - let compaction_result = compaction_svc - .compact_dataset( - &dataset_handle, - CompactionOptions { - max_slice_size: hard_compaction_args.max_slice_size, - max_slice_records: hard_compaction_args.max_slice_records, - keep_metadata_only: hard_compaction_args.keep_metadata_only, - }, - None, - ) - .await; - - match compaction_result { - Ok(result) => Ok(TaskOutcome::Success(TaskResult::CompactionDatasetResult( - result.into(), - ))), - Err(_) => Ok(TaskOutcome::Failed(TaskError::Empty)), - } - } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] impl TaskExecutor for TaskExecutorImpl { + #[tracing::instrument(level = "info", skip_all)] + async fn pre_run(&self) -> Result<(), InternalError> { + self.recover_running_tasks().await?; + Ok(()) + } + // TODO: Error and panic handling strategy + #[tracing::instrument(level = "info", skip_all)] async fn run(&self) -> Result<(), InternalError> { loop { - let task = self.take_task().await?; - let task_outcome = self.execute_task(&task).await?; - self.process_task_outcome(task, task_outcome).await?; + self.run_task_iteration().await?; } } + + /// Runs single task only, blocks until it is available (for tests only!) + #[tracing::instrument(level = "info", skip_all)] + async fn run_single_task(&self) -> Result<(), InternalError> { + self.run_task_iteration().await + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/src/task_logical_plan_runner_impl.rs b/src/domain/task-system/services/src/task_logical_plan_runner_impl.rs new file mode 100644 index 000000000..53054c387 --- /dev/null +++ b/src/domain/task-system/services/src/task_logical_plan_runner_impl.rs @@ -0,0 +1,181 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashMap; +use std::sync::Arc; + +use database_common::DatabaseTransactionRunner; +use dill::*; +use internal_error::InternalError; +use kamu_core::{ + CompactionOptions, + CompactionService, + DatasetRepository, + PollingIngestOptions, + PullError, + PullOptions, + PullService, + ResetError, + ResetService, + TransformError, +}; +use kamu_datasets::{DatasetEnvVar, DatasetEnvVarService}; +use kamu_task_system::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct TaskLogicalPlanRunnerImpl { + catalog: Catalog, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn TaskLogicalPlanRunner)] +impl TaskLogicalPlanRunnerImpl { + pub fn new(catalog: Catalog) -> Self { + Self { catalog } + } + + async fn run_probe(&self, probe_plan: &Probe) -> Result { + if let Some(busy_time) = &probe_plan.busy_time { + tokio::time::sleep(*busy_time).await; + } + Ok(probe_plan + .end_with_outcome + .clone() + .unwrap_or(TaskOutcome::Success(TaskResult::Empty))) + } + + async fn run_update(&self, args: &UpdateDataset) -> Result { + let dataset_env_vars = DatabaseTransactionRunner::new(self.catalog.clone()) + .transactional_with( + |dataset_env_vars_svc: Arc| async move { + let dataset_env_vars = dataset_env_vars_svc + .get_all_dataset_env_vars_by_dataset_id(&args.dataset_id, None) + .await + .int_err()?; + Ok(dataset_env_vars.list) + }, + ) + .await?; + let dataset_env_vars_hash_map = dataset_env_vars + .into_iter() + .map(|dataset_env_var| (dataset_env_var.key.clone(), dataset_env_var)) + .collect::>(); + let pull_options = PullOptions { + ingest_options: PollingIngestOptions { + dataset_env_vars: dataset_env_vars_hash_map, + fetch_uncacheable: args.fetch_uncacheable, + ..Default::default() + }, + ..Default::default() + }; + + let pull_svc = self.catalog.get_one::().int_err()?; + let maybe_pull_result = pull_svc + .pull(&args.dataset_id.as_any_ref(), pull_options, None) + .await; + + match maybe_pull_result { + Ok(pull_result) => Ok(TaskOutcome::Success(TaskResult::UpdateDatasetResult( + TaskUpdateDatasetResult { pull_result }, + ))), + Err(err) => match err { + PullError::TransformError(TransformError::InvalidInterval(_)) => { + Ok(TaskOutcome::Failed(TaskError::UpdateDatasetError( + UpdateDatasetTaskError::RootDatasetCompacted(RootDatasetCompactedError { + dataset_id: args.dataset_id.clone(), + }), + ))) + } + _ => Ok(TaskOutcome::Failed(TaskError::Empty)), + }, + } + } + + async fn run_reset(&self, args: &ResetDataset) -> Result { + let reset_svc = self.catalog.get_one::().int_err()?; + let dataset_repo = self.catalog.get_one::().int_err()?; + let dataset_handle = dataset_repo + .resolve_dataset_ref(&args.dataset_id.as_local_ref()) + .await + .int_err()?; + + let reset_result_maybe = reset_svc + .reset_dataset( + &dataset_handle, + args.new_head_hash.as_ref(), + args.old_head_hash.as_ref(), + ) + .await; + match reset_result_maybe { + Ok(new_head) => Ok(TaskOutcome::Success(TaskResult::ResetDatasetResult( + TaskResetDatasetResult { new_head }, + ))), + Err(err) => match err { + ResetError::BlockNotFound(_) => Ok(TaskOutcome::Failed( + TaskError::ResetDatasetError(ResetDatasetTaskError::ResetHeadNotFound), + )), + _ => Ok(TaskOutcome::Failed(TaskError::Empty)), + }, + } + } + + async fn run_hard_compaction( + &self, + args: &HardCompactionDataset, + ) -> Result { + let compaction_svc = self.catalog.get_one::().int_err()?; + let dataset_repo = self.catalog.get_one::().int_err()?; + let dataset_handle = dataset_repo + .resolve_dataset_ref(&args.dataset_id.as_local_ref()) + .await + .int_err()?; + + let compaction_result = compaction_svc + .compact_dataset( + &dataset_handle, + CompactionOptions { + max_slice_size: args.max_slice_size, + max_slice_records: args.max_slice_records, + keep_metadata_only: args.keep_metadata_only, + }, + None, + ) + .await; + + match compaction_result { + Ok(result) => Ok(TaskOutcome::Success(TaskResult::CompactionDatasetResult( + result.into(), + ))), + Err(_) => Ok(TaskOutcome::Failed(TaskError::Empty)), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl TaskLogicalPlanRunner for TaskLogicalPlanRunnerImpl { + async fn run_plan(&self, logical_plan: &LogicalPlan) -> Result { + let task_outcome = match logical_plan { + LogicalPlan::UpdateDataset(upd) => self.run_update(upd).await?, + LogicalPlan::Probe(probe) => self.run_probe(probe).await?, + LogicalPlan::Reset(reset) => self.run_reset(reset).await?, + LogicalPlan::HardCompactionDataset(compaction) => { + self.run_hard_compaction(compaction).await? + } + }; + + Ok(task_outcome) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/src/task_scheduler_impl.rs b/src/domain/task-system/services/src/task_scheduler_impl.rs index 9e3ca38e7..7e6e50c3b 100644 --- a/src/domain/task-system/services/src/task_scheduler_impl.rs +++ b/src/domain/task-system/services/src/task_scheduler_impl.rs @@ -7,42 +7,30 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use std::collections::VecDeque; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use dill::*; use kamu_task_system::*; -use opendatafabric::DatasetID; use time_source::SystemTimeSource; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct TaskSchedulerImpl { - state: Arc>, - // TODO: EventStore is transaction-dependent, it can't be instantiated in a singleton - event_store: Arc, + task_event_store: Arc, time_source: Arc, } -#[derive(Default)] -struct State { - // TODO: store in DB or something like Redis - task_queue: VecDeque, -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[component(pub)] #[interface(dyn TaskScheduler)] -#[scope(Singleton)] impl TaskSchedulerImpl { pub fn new( - event_store: Arc, + task_event_store: Arc, time_source: Arc, ) -> Self { Self { - state: Arc::new(Mutex::new(State::default())), - event_store, + task_event_store, time_source, } } @@ -53,113 +41,64 @@ impl TaskSchedulerImpl { #[async_trait::async_trait] impl TaskScheduler for TaskSchedulerImpl { #[tracing::instrument(level = "info", skip_all, fields(?logical_plan))] - async fn create_task(&self, logical_plan: LogicalPlan) -> Result { + async fn create_task( + &self, + logical_plan: LogicalPlan, + metadata: Option, + ) -> Result { let mut task = Task::new( self.time_source.now(), - self.event_store.new_task_id().await?, + self.task_event_store.new_task_id().await?, logical_plan, + metadata, ); - task.save(self.event_store.as_ref()).await.int_err()?; - - let queue_len = { - let mut state = self.state.lock().unwrap(); - state.task_queue.push_back(task.task_id); - state.task_queue.len() - }; + task.save(self.task_event_store.as_ref()).await.int_err()?; tracing::info!( task_id = %task.task_id, - queue_len, "Task queued" ); Ok(task.into()) } - #[tracing::instrument(level = "info", skip_all, fields(%task_id))] + #[tracing::instrument(level = "debug", skip_all, fields(%task_id))] async fn get_task(&self, task_id: TaskID) -> Result { - let task = Task::load(task_id, self.event_store.as_ref()).await?; + let task = Task::load(task_id, self.task_event_store.as_ref()).await?; Ok(task.into()) } #[tracing::instrument(level = "info", skip_all, fields(%task_id))] async fn cancel_task(&self, task_id: TaskID) -> Result { - let mut task = Task::load(task_id, self.event_store.as_ref()).await?; + let mut task = Task::load(task_id, self.task_event_store.as_ref()).await?; if task.can_cancel() { task.cancel(self.time_source.now()).int_err()?; - task.save(self.event_store.as_ref()).await.int_err()?; - - let mut state = self.state.lock().unwrap(); - state.task_queue.retain(|task_id| *task_id != task.task_id); + task.save(self.task_event_store.as_ref()).await.int_err()?; } Ok(task.into()) } - #[tracing::instrument(level = "info", skip_all, fields(%dataset_id))] - async fn list_tasks_by_dataset( - &self, - dataset_id: &DatasetID, - pagination: TaskPaginationOpts, - ) -> Result { - let total_count = self - .event_store - .get_count_tasks_by_dataset(dataset_id) - .await?; - - let dataset_id = dataset_id.clone(); - - use futures::TryStreamExt; - let stream = Box::pin(async_stream::stream! { - let relevant_task_ids: Vec<_> = self - .event_store - .get_tasks_by_dataset(&dataset_id, pagination) - .await - .try_collect() - .await - .int_err()?; - - // TODO: implement batch loading - for task_id in relevant_task_ids { - let task = Task::load(task_id, self.event_store.as_ref()).await.int_err()?; - yield Ok(task.into()); - } - }); - - Ok(TaskStateListing { - stream, - total_count, - }) - } - - // TODO: Use signaling instead of a loop - async fn take(&self) -> Result { - loop { - if let Some(task_id) = self.try_take().await? { - return Ok(task_id); - } - - tokio::time::sleep(std::time::Duration::from_secs(1)).await; - } - } - - // TODO: How to prevent tasks from being lost if executor crashes - async fn try_take(&self) -> Result, TakeTaskError> { - let task_id = { - let mut s = self.state.lock().unwrap(); - s.task_queue.pop_front() - }; - - let Some(task_id) = task_id else { + #[tracing::instrument(level = "debug", skip_all)] + async fn try_take(&self) -> Result, TakeTaskError> { + // Try reading just 1 earliest queued task + let Some(task_id) = self + .task_event_store + .try_get_queued_task() + .await + .map_err(TakeTaskError::Internal)? + else { + // No queued tasks yet.. return Ok(None); }; - let mut task = Task::load(task_id, self.event_store.as_ref()) + // Mark the task as running and hand it over to Executor + let mut task = Task::load(task_id, self.task_event_store.as_ref()) .await .int_err()?; task.run(self.time_source.now()).int_err()?; - task.save(self.event_store.as_ref()).await.int_err()?; + task.save(self.task_event_store.as_ref()).await.int_err()?; tracing::info!( %task_id, @@ -167,7 +106,7 @@ impl TaskScheduler for TaskSchedulerImpl { "Handing over a task to an executor", ); - Ok(Some(task_id)) + Ok(Some(task)) } } diff --git a/src/domain/task-system/services/tests/tests/mod.rs b/src/domain/task-system/services/tests/tests/mod.rs index 3a34745d4..7b40bdb45 100644 --- a/src/domain/task-system/services/tests/tests/mod.rs +++ b/src/domain/task-system/services/tests/tests/mod.rs @@ -8,4 +8,6 @@ // by the Apache License, Version 2.0. mod test_task_aggregate; + +mod test_task_executor_impl; mod test_task_scheduler_impl; diff --git a/src/domain/task-system/services/tests/tests/test_task_aggregate.rs b/src/domain/task-system/services/tests/tests/test_task_aggregate.rs index 3e60b1065..f640ded9b 100644 --- a/src/domain/task-system/services/tests/tests/test_task_aggregate.rs +++ b/src/domain/task-system/services/tests/tests/test_task_aggregate.rs @@ -17,12 +17,15 @@ use kamu_task_system_services::domain::*; #[test_log::test(tokio::test)] async fn test_task_agg_create_new() { - let event_store = InMemoryTaskSystemEventStore::new(); + let event_store = InMemoryTaskEventStore::new(); + + let metadata = TaskMetadata::from(vec![("foo", "x"), ("bar", "y")]); let mut task = Task::new( Utc::now(), event_store.new_task_id().await.unwrap(), Probe::default().into(), + Some(metadata.clone()), ); assert_eq!(event_store.len().await.unwrap(), 0); @@ -34,18 +37,19 @@ async fn test_task_agg_create_new() { assert_eq!(event_store.len().await.unwrap(), 1); let task = Task::load(task.task_id, &event_store).await.unwrap(); - assert_eq!(task.status, TaskStatus::Queued); + assert_eq!(task.status(), TaskStatus::Queued); assert_eq!(task.logical_plan, LogicalPlan::Probe(Probe::default())); + assert_eq!(task.metadata, metadata); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] async fn test_task_save_load_update() { - let event_store = InMemoryTaskSystemEventStore::new(); + let event_store = InMemoryTaskEventStore::new(); let task_id = event_store.new_task_id().await.unwrap(); - let mut task = Task::new(Utc::now(), task_id, Probe::default().into()); + let mut task = Task::new(Utc::now(), task_id, Probe::default().into(), None); task.save(&event_store).await.unwrap(); task.run(Utc::now()).unwrap(); @@ -62,7 +66,8 @@ async fn test_task_save_load_update() { // Full load let task = Task::load(task_id, &event_store).await.unwrap(); - assert_eq!(task.status, TaskStatus::Finished(TaskOutcome::Cancelled)); + assert_eq!(task.status(), TaskStatus::Finished); + assert_eq!(task.outcome, Some(TaskOutcome::Cancelled)); // Partial load let mut task = Task::load_ext( @@ -74,24 +79,26 @@ async fn test_task_save_load_update() { ) .await .unwrap(); - assert_eq!(task.status, TaskStatus::Running); + assert_eq!(task.status(), TaskStatus::Running); assert!(task.cancellation_requested); // Update task.update(&event_store).await.unwrap(); - assert_eq!(task.status, TaskStatus::Finished(TaskOutcome::Cancelled)); + assert_eq!(task.status(), TaskStatus::Finished); + assert_eq!(task.outcome, Some(TaskOutcome::Cancelled)); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] async fn test_task_agg_illegal_transition() { - let event_store = InMemoryTaskSystemEventStore::new(); + let event_store = InMemoryTaskEventStore::new(); let mut task = Task::new( Utc::now(), event_store.new_task_id().await.unwrap(), Probe::default().into(), + None, ); task.finish(Utc::now(), TaskOutcome::Cancelled).unwrap(); @@ -99,3 +106,24 @@ async fn test_task_agg_illegal_transition() { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_task_requeue() { + let event_store = InMemoryTaskEventStore::new(); + + let mut task = Task::new( + Utc::now(), + event_store.new_task_id().await.unwrap(), + Probe::default().into(), + None, + ); + task.run(Utc::now()).unwrap(); + task.save(&event_store).await.unwrap(); + assert_eq!(task.status(), TaskStatus::Running); + + task.requeue(Utc::now()).unwrap(); + task.save(&event_store).await.unwrap(); + assert_eq!(task.status(), TaskStatus::Queued); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/tests/tests/test_task_executor_impl.rs b/src/domain/task-system/services/tests/tests/test_task_executor_impl.rs new file mode 100644 index 000000000..cf77511f9 --- /dev/null +++ b/src/domain/task-system/services/tests/tests/test_task_executor_impl.rs @@ -0,0 +1,243 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::assert_matches::assert_matches; +use std::sync::Arc; + +use database_common::NoOpDatabasePlugin; +use dill::{Catalog, CatalogBuilder}; +use kamu_task_system::*; +use kamu_task_system_inmem::InMemoryTaskEventStore; +use kamu_task_system_services::*; +use messaging_outbox::{MockOutbox, Outbox}; +use mockall::predicate::{eq, function}; +use time_source::SystemTimeSourceDefault; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_pre_run_requeues_running_tasks() { + let harness = TaskExecutorHarness::new(MockOutbox::new(), MockTaskLogicalPlanRunner::new()); + + // Schedule 3 tasks + let task_id_1 = harness.schedule_probe_task().await; + let task_id_2 = harness.schedule_probe_task().await; + let task_id_3 = harness.schedule_probe_task().await; + + // Make 2 of 3 Running + let task_1 = harness.try_take_task().await; + let task_2 = harness.try_take_task().await; + assert_matches!(task_1, Some(t) if t.task_id == task_id_1); + assert_matches!(task_2, Some(t) if t.task_id == task_id_2); + + // 1, 2 Running while 3 should be Queued + let task_1 = harness.get_task(task_id_1).await; + let task_2 = harness.get_task(task_id_2).await; + let task_3 = harness.get_task(task_id_3).await; + assert_eq!(task_1.status(), TaskStatus::Running); + assert_eq!(task_2.status(), TaskStatus::Running); + assert_eq!(task_3.status(), TaskStatus::Queued); + + // A pre-run must convert all Running into Queued + harness.task_executor.pre_run().await.unwrap(); + + // 1, 2, 3 - Queued + let task_1 = harness.get_task(task_id_1).await; + let task_2 = harness.get_task(task_id_2).await; + let task_3 = harness.get_task(task_id_3).await; + assert_eq!(task_1.status(), TaskStatus::Queued); + assert_eq!(task_2.status(), TaskStatus::Queued); + assert_eq!(task_3.status(), TaskStatus::Queued); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_run_single_task() { + // Expect the only task to notify about Running and Finished transitions + let mut mock_outbox = MockOutbox::new(); + TaskExecutorHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(0)); + + // Expect logical plan runner to run probe + let mut mock_plan_runner = MockTaskLogicalPlanRunner::new(); + TaskExecutorHarness::add_run_probe_plan_expectations( + &mut mock_plan_runner, + Probe::default(), + 1, + ); + + // Schedule the only task + let harness = TaskExecutorHarness::new(mock_outbox, mock_plan_runner); + let task_id = harness.schedule_probe_task().await; + let task = harness.get_task(task_id).await; + assert_eq!(task.status(), TaskStatus::Queued); + + // Run execution loop + harness.task_executor.run_single_task().await.unwrap(); + + // Check the task has Finished status at the end + let task = harness.get_task(task_id).await; + assert_eq!(task.status(), TaskStatus::Finished); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_run_two_of_three_tasks() { + // Expect 2 of 3 tasks to notify about Running and Finished transitions + let mut mock_outbox = MockOutbox::new(); + TaskExecutorHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(0)); + TaskExecutorHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(1)); + + // Expect logical plan runner to run probe twice + let mut mock_plan_runner = MockTaskLogicalPlanRunner::new(); + TaskExecutorHarness::add_run_probe_plan_expectations( + &mut mock_plan_runner, + Probe::default(), + 2, + ); + + // Schedule 3 tasks + let harness = TaskExecutorHarness::new(mock_outbox, mock_plan_runner); + let task_id_1 = harness.schedule_probe_task().await; + let task_id_2 = harness.schedule_probe_task().await; + let task_id_3 = harness.schedule_probe_task().await; + + // All 3 must be in Queued state before runs + let task_1 = harness.get_task(task_id_1).await; + let task_2 = harness.get_task(task_id_2).await; + let task_3 = harness.get_task(task_id_3).await; + assert_eq!(task_1.status(), TaskStatus::Queued); + assert_eq!(task_2.status(), TaskStatus::Queued); + assert_eq!(task_3.status(), TaskStatus::Queued); + + // Run execution loop twice + harness.task_executor.run_single_task().await.unwrap(); + harness.task_executor.run_single_task().await.unwrap(); + + // Check the 2 tasks Finished, 3rd is still Queued + let task_1 = harness.get_task(task_id_1).await; + let task_2 = harness.get_task(task_id_2).await; + let task_3 = harness.get_task(task_id_3).await; + assert_eq!(task_1.status(), TaskStatus::Finished); + assert_eq!(task_2.status(), TaskStatus::Finished); + assert_eq!(task_3.status(), TaskStatus::Queued); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct TaskExecutorHarness { + _catalog: Catalog, + task_executor: Arc, + task_scheduler: Arc, +} + +impl TaskExecutorHarness { + pub fn new(mock_outbox: MockOutbox, mock_plan_runner: MockTaskLogicalPlanRunner) -> Self { + let mut b = CatalogBuilder::new(); + b.add::() + .add::() + .add::() + .add_value(mock_plan_runner) + .bind::() + .add_value(mock_outbox) + .bind::() + .add::(); + + NoOpDatabasePlugin::init_database_components(&mut b); + + let catalog = b.build(); + + let task_executor = catalog.get_one().unwrap(); + let task_scheduler = catalog.get_one().unwrap(); + + Self { + _catalog: catalog, + task_executor, + task_scheduler, + } + } + + async fn schedule_probe_task(&self) -> TaskID { + self.task_scheduler + .create_task(Probe { ..Probe::default() }.into(), None) + .await + .unwrap() + .task_id + } + + async fn try_take_task(&self) -> Option { + self.task_scheduler.try_take().await.unwrap() + } + + async fn get_task(&self, task_id: TaskID) -> TaskState { + self.task_scheduler.get_task(task_id).await.unwrap() + } + + fn add_outbox_task_expectations(mock_outbox: &mut MockOutbox, a_task_id: TaskID) { + mock_outbox + .expect_post_message_as_json() + .with( + eq(MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR), + function(move |message_as_json: &serde_json::Value| { + matches!( + serde_json::from_value::(message_as_json.clone()), + Ok(TaskProgressMessage::Running(TaskProgressMessageRunning { + task_id, + .. + })) if task_id == a_task_id + ) + }), + ) + .times(1) + .returning(|_, _| Ok(())); + + mock_outbox + .expect_post_message_as_json() + .with( + eq(MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR), + function(move |message_as_json: &serde_json::Value| { + matches!( + serde_json::from_value::(message_as_json.clone()), + Ok(TaskProgressMessage::Finished(TaskProgressMessageFinished { + task_id, + .. + })) if task_id == a_task_id + ) + }), + ) + .times(1) + .returning(|_, _| Ok(())); + } + + fn add_run_probe_plan_expectations( + mock_plan_runner: &mut MockTaskLogicalPlanRunner, + probe: Probe, + times: usize, + ) { + mock_plan_runner + .expect_run_plan() + .with(eq(LogicalPlan::Probe(probe))) + .times(times) + .returning(|_| Ok(TaskOutcome::Success(TaskResult::Empty))); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +mockall::mock! { + pub TaskLogicalPlanRunner {} + + #[async_trait::async_trait] + impl TaskLogicalPlanRunner for TaskLogicalPlanRunner { + async fn run_plan(&self, logical_plan: &LogicalPlan) -> Result; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/tests/tests/test_task_scheduler_impl.rs b/src/domain/task-system/services/tests/tests/test_task_scheduler_impl.rs index 90d5f7027..57bd28fad 100644 --- a/src/domain/task-system/services/tests/tests/test_task_scheduler_impl.rs +++ b/src/domain/task-system/services/tests/tests/test_task_scheduler_impl.rs @@ -10,8 +10,8 @@ use std::assert_matches::assert_matches; use std::sync::Arc; -use kamu_task_system::{LogicalPlan, Probe, TaskScheduler, TaskState, TaskStatus}; -use kamu_task_system_inmem::InMemoryTaskSystemEventStore; +use kamu_task_system::{LogicalPlan, Probe, TaskMetadata, TaskScheduler, TaskState, TaskStatus}; +use kamu_task_system_inmem::InMemoryTaskEventStore; use kamu_task_system_services::TaskSchedulerImpl; use time_source::SystemTimeSourceStub; @@ -23,72 +23,123 @@ async fn test_creates_task() { let logical_plan_expected: LogicalPlan = Probe { ..Probe::default() }.into(); + let metadata_expected = TaskMetadata::from(vec![("foo", "x"), ("bar", "y")]); + let task_state_actual = task_sched - .create_task(logical_plan_expected.clone()) + .create_task( + logical_plan_expected.clone(), + Some(metadata_expected.clone()), + ) .await .unwrap(); assert_matches!(task_state_actual, TaskState { - status: TaskStatus::Queued, + outcome: None, cancellation_requested: false, logical_plan, + metadata, ran_at: None, cancellation_requested_at: None, finished_at: None, .. - } if logical_plan == logical_plan_expected); + } if logical_plan == logical_plan_expected && metadata == metadata_expected ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_queues_tasks() { let task_sched = create_task_scheduler(); + let maybe_task_0 = task_sched.try_take().await.unwrap(); + assert!(maybe_task_0.is_none()); + let task_id_1 = task_sched - .create_task(Probe { ..Probe::default() }.into()) + .create_task(Probe { ..Probe::default() }.into(), None) .await .unwrap() .task_id; let task_id_2 = task_sched - .create_task(Probe { ..Probe::default() }.into()) + .create_task(Probe { ..Probe::default() }.into(), None) .await .unwrap() .task_id; - assert_eq!(task_sched.try_take().await.unwrap(), Some(task_id_1)); - assert_eq!(task_sched.try_take().await.unwrap(), Some(task_id_2)); - assert_eq!(task_sched.try_take().await.unwrap(), None); + let maybe_task_1 = task_sched.try_take().await.unwrap(); + assert!(maybe_task_1.is_some_and(|t| t.task_id == task_id_1)); + + let maybe_task_2 = task_sched.try_take().await.unwrap(); + assert!(maybe_task_2.is_some_and(|t| t.task_id == task_id_2)); + + let maybe_task_3 = task_sched.try_take().await.unwrap(); + assert!(maybe_task_3.is_none()); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_task_taken_task_is_running() { + let task_sched = create_task_scheduler(); + + let task_id_1 = task_sched + .create_task(Probe { ..Probe::default() }.into(), None) + .await + .unwrap() + .task_id; + + let task_id_2 = task_sched + .create_task(Probe { ..Probe::default() }.into(), None) + .await + .unwrap() + .task_id; + + let task_1 = task_sched.get_task(task_id_1).await.unwrap(); + let task_2 = task_sched.get_task(task_id_2).await.unwrap(); + assert_eq!(task_1.status(), TaskStatus::Queued); + assert_eq!(task_2.status(), TaskStatus::Queued); + + task_sched.try_take().await.unwrap(); + + let task_1 = task_sched.get_task(task_id_1).await.unwrap(); + let task_2 = task_sched.get_task(task_id_2).await.unwrap(); + assert_eq!(task_1.status(), TaskStatus::Running); + assert_eq!(task_2.status(), TaskStatus::Queued); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_task_cancellation() { let task_sched = create_task_scheduler(); let task_id_1 = task_sched - .create_task(Probe { ..Probe::default() }.into()) + .create_task(Probe { ..Probe::default() }.into(), None) .await .unwrap() .task_id; let task_id_2 = task_sched - .create_task(Probe { ..Probe::default() }.into()) + .create_task(Probe { ..Probe::default() }.into(), None) .await .unwrap() .task_id; task_sched.cancel_task(task_id_1).await.unwrap(); - assert_eq!(task_sched.try_take().await.unwrap(), Some(task_id_2)); - assert_eq!(task_sched.try_take().await.unwrap(), None); + let maybe_task = task_sched.try_take().await.unwrap(); + assert!(maybe_task.is_some_and(|t| t.task_id == task_id_2)); + + let maybe_another_task = task_sched.try_take().await.unwrap(); + assert!(maybe_another_task.is_none()); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// fn create_task_scheduler() -> impl TaskScheduler { - let event_store = Arc::new(InMemoryTaskSystemEventStore::new()); + let task_event_store = Arc::new(InMemoryTaskEventStore::new()); let time_source = Arc::new(SystemTimeSourceStub::new()); - - TaskSchedulerImpl::new(event_store, time_source) + TaskSchedulerImpl::new(task_event_store, time_source) } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/accounts/inmem/src/repos/inmem_access_token_repository.rs b/src/infra/accounts/inmem/src/repos/inmem_access_token_repository.rs index 96dfd826a..de9bafa0a 100644 --- a/src/infra/accounts/inmem/src/repos/inmem_access_token_repository.rs +++ b/src/infra/accounts/inmem/src/repos/inmem_access_token_repository.rs @@ -12,7 +12,7 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use chrono::{DateTime, Utc}; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::*; use internal_error::ErrorIntoInternal; use kamu_accounts::AccessToken; @@ -132,7 +132,7 @@ impl AccessTokenRepository for InMemoryAccessTokenRepository { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetAccessTokenError> { let guard = self.state.lock().unwrap(); @@ -140,8 +140,8 @@ impl AccessTokenRepository for InMemoryAccessTokenRepository { let access_tokens: Vec<_> = access_token_ids .iter() .map(|token_id| guard.tokens_by_id.get(token_id).unwrap().clone()) - .skip(usize::try_from(pagination.offset).unwrap()) - .take(usize::try_from(pagination.limit).unwrap()) + .skip(pagination.offset) + .take(pagination.limit) .collect(); return Ok(access_tokens); } diff --git a/src/infra/accounts/mysql/src/repos/mysql_access_token_repository.rs b/src/infra/accounts/mysql/src/repos/mysql_access_token_repository.rs index 513574baa..4a9fa00e4 100644 --- a/src/infra/accounts/mysql/src/repos/mysql_access_token_repository.rs +++ b/src/infra/accounts/mysql/src/repos/mysql_access_token_repository.rs @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; -use database_common::{DatabasePaginationOpts, TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; use opendatafabric::AccountID; @@ -150,7 +150,7 @@ impl AccessTokenRepository for MySqlAccessTokenRepository { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetAccessTokenError> { let mut tr = self.transaction.lock().await; @@ -174,8 +174,8 @@ impl AccessTokenRepository for MySqlAccessTokenRepository { LIMIT ? OFFSET ? "#, account_id.to_string(), - pagination.limit, - pagination.offset, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), ) .fetch_all(connection_mut) .await diff --git a/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs b/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs index 4bd5be179..46690322f 100644 --- a/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs +++ b/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs @@ -71,7 +71,7 @@ impl AccountRepository for MySqlAccountRepository { CreateAccountDuplicateField::Name } else if mysql_error_message.contains("for key 'idx_accounts_email'") { CreateAccountDuplicateField::Email - } else if mysql_error_message.contains("for key 'idx_provider_identity_key'") { + } else if mysql_error_message.contains("for key 'idx_accounts_provider_identity_key'") { CreateAccountDuplicateField::ProviderIdentityKey } else { tracing::error!("Unexpected MySQL error message: {}", mysql_error_message); diff --git a/src/infra/accounts/postgres/src/repos/postgres_access_token_repository.rs b/src/infra/accounts/postgres/src/repos/postgres_access_token_repository.rs index 5190e6962..88985d6cf 100644 --- a/src/infra/accounts/postgres/src/repos/postgres_access_token_repository.rs +++ b/src/infra/accounts/postgres/src/repos/postgres_access_token_repository.rs @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; -use database_common::{DatabasePaginationOpts, TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; use opendatafabric::AccountID; @@ -152,7 +152,7 @@ impl AccessTokenRepository for PostgresAccessTokenRepository { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetAccessTokenError> { let mut tr = self.transaction.lock().await; @@ -176,8 +176,8 @@ impl AccessTokenRepository for PostgresAccessTokenRepository { LIMIT $2 OFFSET $3 "#, account_id.to_string(), - pagination.limit, - pagination.offset, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), ) .fetch_all(connection_mut) .await diff --git a/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs b/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs index ac4404b54..74e9671c5 100644 --- a/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs +++ b/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs @@ -66,7 +66,7 @@ impl AccountRepository for PostgresAccountRepository { Some("accounts_pkey") => CreateAccountDuplicateField::Id, Some("idx_accounts_email") => CreateAccountDuplicateField::Email, Some("idx_accounts_name") => CreateAccountDuplicateField::Name, - Some("idx_provider_identity_key") => CreateAccountDuplicateField::ProviderIdentityKey, + Some("idx_accounts_provider_identity_key") => CreateAccountDuplicateField::ProviderIdentityKey, _ => { tracing::error!("Unexpected Postgres error message: {}", e.message()); CreateAccountDuplicateField::Id diff --git a/src/infra/accounts/repo-tests/src/access_token_repository_test_suite.rs b/src/infra/accounts/repo-tests/src/access_token_repository_test_suite.rs index 45b5b12d0..a6172da7f 100644 --- a/src/infra/accounts/repo-tests/src/access_token_repository_test_suite.rs +++ b/src/infra/accounts/repo-tests/src/access_token_repository_test_suite.rs @@ -10,7 +10,7 @@ use std::assert_matches::assert_matches; use chrono::{SubsecRound, Utc}; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::Catalog; use kamu_accounts::*; use uuid::Uuid; @@ -93,7 +93,7 @@ pub async fn test_insert_and_locate_multiple_access_tokens(catalog: &Catalog) { let mut db_access_tokens = access_token_repo .get_access_tokens_by_account_id( &account.id, - &DatabasePaginationOpts { + &PaginationOpts { limit: 10, offset: 0, }, diff --git a/src/infra/accounts/sqlite/src/repos/sqlite_access_token_repository.rs b/src/infra/accounts/sqlite/src/repos/sqlite_access_token_repository.rs index d91be2bf4..34a926d1f 100644 --- a/src/infra/accounts/sqlite/src/repos/sqlite_access_token_repository.rs +++ b/src/infra/accounts/sqlite/src/repos/sqlite_access_token_repository.rs @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; -use database_common::{DatabasePaginationOpts, TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; use opendatafabric::AccountID; @@ -164,7 +164,7 @@ impl AccessTokenRepository for SqliteAccessTokenRepository { async fn get_access_tokens_by_account_id( &self, account_id: &AccountID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetAccessTokenError> { let mut tr = self.transaction.lock().await; @@ -172,8 +172,8 @@ impl AccessTokenRepository for SqliteAccessTokenRepository { .connection_mut() .await .map_err(GetAccessTokenError::Internal)?; - let limit = pagination.limit; - let offset = pagination.offset; + let limit = i64::try_from(pagination.limit).unwrap(); + let offset = i64::try_from(pagination.offset).unwrap(); let account_id_string = account_id.to_string(); let access_token_rows = sqlx::query_as!( diff --git a/src/infra/auth-rebac/inmem/tests/repos/test_inmem_rebac_repository.rs b/src/infra/auth-rebac/inmem/tests/repos/test_inmem_rebac_repository.rs index aa9be1cef..1a0983e21 100644 --- a/src/infra/auth-rebac/inmem/tests/repos/test_inmem_rebac_repository.rs +++ b/src/infra/auth-rebac/inmem/tests/repos/test_inmem_rebac_repository.rs @@ -92,7 +92,6 @@ struct InMemoryRebacRepositoryHarness { impl InMemoryRebacRepositoryHarness { pub fn new() -> Self { let mut catalog_builder = CatalogBuilder::new(); - catalog_builder.add::(); Self { diff --git a/src/infra/core/src/sync_service_impl.rs b/src/infra/core/src/sync_service_impl.rs index 63e357994..efa3bb4d0 100644 --- a/src/infra/core/src/sync_service_impl.rs +++ b/src/infra/core/src/sync_service_impl.rs @@ -21,13 +21,14 @@ use super::utils::smart_transfer_protocol::SmartTransferProtocolClient; use crate::utils::ipfs_wrapper::*; use crate::utils::simple_transfer_protocol::{DatasetFactoryFn, SimpleTransferProtocol}; use crate::utils::smart_transfer_protocol::TransferOptions; +use crate::DatasetRepositoryWriter; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct SyncServiceImpl { remote_repo_reg: Arc, dataset_repo: Arc, - create_dataset_use_case: Arc, + dataset_repo_writer: Arc, dataset_action_authorizer: Arc, dataset_factory: Arc, smart_transfer_protocol: Arc, @@ -42,7 +43,7 @@ impl SyncServiceImpl { pub fn new( remote_repo_reg: Arc, dataset_repo: Arc, - create_dataset_use_case: Arc, + dataset_repo_writer: Arc, dataset_action_authorizer: Arc, dataset_factory: Arc, smart_transfer_protocol: Arc, @@ -51,7 +52,7 @@ impl SyncServiceImpl { Self { remote_repo_reg, dataset_repo, - create_dataset_use_case, + dataset_repo_writer, dataset_action_authorizer, dataset_factory, smart_transfer_protocol, @@ -144,7 +145,7 @@ impl SyncServiceImpl { } Err(GetDatasetError::NotFound(_)) if create_if_not_exists => { let alias = local_ref.alias().unwrap().clone(); - let create_dataset_use_case = self.create_dataset_use_case.clone(); + let repo_writer = self.dataset_repo_writer.clone(); Ok(( None, @@ -152,13 +153,11 @@ impl SyncServiceImpl { Box::pin(async move { // After retrieving the dataset externally, we default to // private visibility. - let create_options = CreateDatasetUseCaseOptions { + /*let create_options = CreateDatasetUseCaseOptions { dataset_visibility: DatasetVisibility::Private, - }; + };*/ - create_dataset_use_case - .execute(&alias, seed_block, create_options) - .await + repo_writer.create_dataset(&alias, seed_block).await }) })), )) diff --git a/src/infra/core/tests/tests/test_pull_service_impl.rs b/src/infra/core/tests/tests/test_pull_service_impl.rs index 76773632d..f8efabea8 100644 --- a/src/infra/core/tests/tests/test_pull_service_impl.rs +++ b/src/infra/core/tests/tests/test_pull_service_impl.rs @@ -19,7 +19,6 @@ use kamu::domain::*; use kamu::testing::*; use kamu::*; use kamu_accounts::{CurrentAccountSubject, DEFAULT_ACCOUNT_NAME_STR}; -use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use time_source::SystemTimeSourceDefault; @@ -146,7 +145,7 @@ async fn create_graph( // remote dataset async fn create_graph_remote( dataset_repo: Arc, - create_dataset_use_case: Arc, + dataset_repo_writer: Arc, reg: Arc, datasets: Vec<(DatasetAlias, Vec)>, to_import: Vec, @@ -173,7 +172,7 @@ async fn create_graph_remote( let sync_service = SyncServiceImpl::new( reg.clone(), dataset_repo, - create_dataset_use_case, + dataset_repo_writer, Arc::new(auth::AlwaysHappyDatasetActionAuthorizer::new()), Arc::new(DatasetFactoryImpl::new( IpfsGateway::default(), @@ -374,7 +373,7 @@ async fn test_pull_batching_complex_with_remote() { // D -----------/ create_graph_remote( harness.dataset_repo.clone(), - harness.create_dataset_use_case.clone(), + harness.dataset_repo.clone(), harness.remote_repo_reg.clone(), vec![ (n!("a"), names![]), @@ -875,7 +874,6 @@ struct PullTestHarness { remote_repo_reg: Arc, remote_alias_reg: Arc, pull_svc: Arc, - create_dataset_use_case: Arc, } impl PullTestHarness { @@ -919,8 +917,6 @@ impl PullTestHarness { .add_builder(TestSyncService::builder().with_calls(calls.clone())) .bind::() .add::() - .add::() - .add::() .build(); Self { @@ -929,7 +925,6 @@ impl PullTestHarness { remote_repo_reg: catalog.get_one().unwrap(), remote_alias_reg: catalog.get_one().unwrap(), pull_svc: catalog.get_one().unwrap(), - create_dataset_use_case: catalog.get_one().unwrap(), } } diff --git a/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs b/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs index c7f728d51..8bb6bfccf 100644 --- a/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs +++ b/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs @@ -11,7 +11,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::{Arc, Mutex}; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::*; use opendatafabric::DatasetID; use uuid::Uuid; @@ -103,7 +103,7 @@ impl DatasetEnvVarRepository for InMemoryDatasetEnvVarRepository { async fn get_all_dataset_env_vars_by_dataset_id( &self, dataset_id: &DatasetID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetDatasetEnvVarError> { let guard = self.state.lock().unwrap(); if let Some(dataset_env_var_ids) = guard.dataset_env_var_ids_by_dataset_id.get(dataset_id) { @@ -116,8 +116,8 @@ impl DatasetEnvVarRepository for InMemoryDatasetEnvVarRepository { .unwrap() .clone() }) - .skip(usize::try_from(pagination.offset).unwrap()) - .take(usize::try_from(pagination.limit).unwrap()) + .skip(pagination.offset) + .take(pagination.limit) .collect(); return Ok(dataset_env_vars); } diff --git a/src/infra/datasets/postgres/.sqlx/query-614cdbedfa4b8811d3ec3b73d04816c3519ec95bb9554a55f2e5a966ad4085f8.json b/src/infra/datasets/postgres/.sqlx/query-1edeeb7a5a7cd1622c3677f517050b2f2f20186e5d604dd2378a5d492624cd22.json similarity index 66% rename from src/infra/datasets/postgres/.sqlx/query-614cdbedfa4b8811d3ec3b73d04816c3519ec95bb9554a55f2e5a966ad4085f8.json rename to src/infra/datasets/postgres/.sqlx/query-1edeeb7a5a7cd1622c3677f517050b2f2f20186e5d604dd2378a5d492624cd22.json index 68167c358..ec921308f 100644 --- a/src/infra/datasets/postgres/.sqlx/query-614cdbedfa4b8811d3ec3b73d04816c3519ec95bb9554a55f2e5a966ad4085f8.json +++ b/src/infra/datasets/postgres/.sqlx/query-1edeeb7a5a7cd1622c3677f517050b2f2f20186e5d604dd2378a5d492624cd22.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n key,\n value as \"value: _\",\n secret_nonce,\n created_at,\n dataset_id as \"dataset_id: _\"\n FROM dataset_env_vars\n WHERE id = $1\n ", + "query": "\n SELECT\n id,\n key,\n value as \"value: _\",\n secret_nonce,\n created_at,\n dataset_id as \"dataset_id: _\"\n FROM dataset_env_vars\n WHERE id = $1\n ", "describe": { "columns": [ { @@ -48,5 +48,5 @@ false ] }, - "hash": "614cdbedfa4b8811d3ec3b73d04816c3519ec95bb9554a55f2e5a966ad4085f8" + "hash": "1edeeb7a5a7cd1622c3677f517050b2f2f20186e5d604dd2378a5d492624cd22" } diff --git a/src/infra/datasets/postgres/.sqlx/query-75559118bbccfb88b1ecfd3ff44e49438d13be39bdcc2be800871f53177b2c4e.json b/src/infra/datasets/postgres/.sqlx/query-75559118bbccfb88b1ecfd3ff44e49438d13be39bdcc2be800871f53177b2c4e.json deleted file mode 100644 index bf7804ec2..000000000 --- a/src/infra/datasets/postgres/.sqlx/query-75559118bbccfb88b1ecfd3ff44e49438d13be39bdcc2be800871f53177b2c4e.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT\n count(*)\n FROM dataset_env_vars\n WHERE dataset_id = $1\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "count", - "type_info": "Int8" - } - ], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [ - null - ] - }, - "hash": "75559118bbccfb88b1ecfd3ff44e49438d13be39bdcc2be800871f53177b2c4e" -} diff --git a/src/infra/datasets/postgres/.sqlx/query-55e81288f0c5757d87241b9a627456a55185ee1f47e608ecdd1b2872f084c603.json b/src/infra/datasets/postgres/.sqlx/query-ba4b58ace753056170758ba8dd7d8ee243743c12f8f78060f1218ea82e101ebd.json similarity index 62% rename from src/infra/datasets/postgres/.sqlx/query-55e81288f0c5757d87241b9a627456a55185ee1f47e608ecdd1b2872f084c603.json rename to src/infra/datasets/postgres/.sqlx/query-ba4b58ace753056170758ba8dd7d8ee243743c12f8f78060f1218ea82e101ebd.json index 6b4002a47..a0f0b6449 100644 --- a/src/infra/datasets/postgres/.sqlx/query-55e81288f0c5757d87241b9a627456a55185ee1f47e608ecdd1b2872f084c603.json +++ b/src/infra/datasets/postgres/.sqlx/query-ba4b58ace753056170758ba8dd7d8ee243743c12f8f78060f1218ea82e101ebd.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n key,\n value as \"value: _\",\n secret_nonce,\n created_at,\n dataset_id as \"dataset_id: _\"\n FROM dataset_env_vars\n WHERE dataset_id = $1\n and key = $2\n ", + "query": "\n SELECT\n id,\n key,\n value as \"value: _\",\n secret_nonce,\n created_at,\n dataset_id as \"dataset_id: _\"\n FROM dataset_env_vars\n WHERE dataset_id = $1\n and key = $2\n ", "describe": { "columns": [ { @@ -49,5 +49,5 @@ false ] }, - "hash": "55e81288f0c5757d87241b9a627456a55185ee1f47e608ecdd1b2872f084c603" + "hash": "ba4b58ace753056170758ba8dd7d8ee243743c12f8f78060f1218ea82e101ebd" } diff --git a/src/infra/task-system/postgres/.sqlx/query-833cd145ab5b1ca276878c1041c973e52b6966fb6011c4937665fbe75766785a.json b/src/infra/datasets/postgres/.sqlx/query-d0fd342b03591da7b2507647e7e8198566a1cd4a70bb771fb674e0753dab39c6.json similarity index 54% rename from src/infra/task-system/postgres/.sqlx/query-833cd145ab5b1ca276878c1041c973e52b6966fb6011c4937665fbe75766785a.json rename to src/infra/datasets/postgres/.sqlx/query-d0fd342b03591da7b2507647e7e8198566a1cd4a70bb771fb674e0753dab39c6.json index 5ddb49a36..3363360d8 100644 --- a/src/infra/task-system/postgres/.sqlx/query-833cd145ab5b1ca276878c1041c973e52b6966fb6011c4937665fbe75766785a.json +++ b/src/infra/datasets/postgres/.sqlx/query-d0fd342b03591da7b2507647e7e8198566a1cd4a70bb771fb674e0753dab39c6.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT COUNT(event_id) FROM task_events\n WHERE dataset_id = $1\n ", + "query": "\n SELECT\n count(*)\n FROM dataset_env_vars\n WHERE dataset_id = $1\n ", "describe": { "columns": [ { @@ -18,5 +18,5 @@ null ] }, - "hash": "833cd145ab5b1ca276878c1041c973e52b6966fb6011c4937665fbe75766785a" + "hash": "d0fd342b03591da7b2507647e7e8198566a1cd4a70bb771fb674e0753dab39c6" } diff --git a/src/infra/datasets/postgres/.sqlx/query-4118010163b167f384a8af15ea0046dc9a40aa97ed58791a6e93a5c7a0f12482.json b/src/infra/datasets/postgres/.sqlx/query-fa625ca056ddf70e90ebce50e66c910274d6d6108de559bac5e21c6d5bbc4df8.json similarity index 62% rename from src/infra/datasets/postgres/.sqlx/query-4118010163b167f384a8af15ea0046dc9a40aa97ed58791a6e93a5c7a0f12482.json rename to src/infra/datasets/postgres/.sqlx/query-fa625ca056ddf70e90ebce50e66c910274d6d6108de559bac5e21c6d5bbc4df8.json index 681bcd33a..3763df35b 100644 --- a/src/infra/datasets/postgres/.sqlx/query-4118010163b167f384a8af15ea0046dc9a40aa97ed58791a6e93a5c7a0f12482.json +++ b/src/infra/datasets/postgres/.sqlx/query-fa625ca056ddf70e90ebce50e66c910274d6d6108de559bac5e21c6d5bbc4df8.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n id,\n key,\n value as \"value: _\",\n secret_nonce,\n created_at,\n dataset_id as \"dataset_id: _\"\n FROM dataset_env_vars\n WHERE dataset_id = $1\n LIMIT $2 OFFSET $3\n ", + "query": "\n SELECT\n id,\n key,\n value as \"value: _\",\n secret_nonce,\n created_at,\n dataset_id as \"dataset_id: _\"\n FROM dataset_env_vars\n WHERE dataset_id = $1\n LIMIT $2 OFFSET $3\n ", "describe": { "columns": [ { @@ -50,5 +50,5 @@ false ] }, - "hash": "4118010163b167f384a8af15ea0046dc9a40aa97ed58791a6e93a5c7a0f12482" + "hash": "fa625ca056ddf70e90ebce50e66c910274d6d6108de559bac5e21c6d5bbc4df8" } diff --git a/src/infra/datasets/postgres/src/repos/postgres_dataset_env_var_repository.rs b/src/infra/datasets/postgres/src/repos/postgres_dataset_env_var_repository.rs index 80ae2cefc..e93e9de4c 100644 --- a/src/infra/datasets/postgres/src/repos/postgres_dataset_env_var_repository.rs +++ b/src/infra/datasets/postgres/src/repos/postgres_dataset_env_var_repository.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::{DatabasePaginationOpts, TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use opendatafabric::DatasetID; @@ -78,7 +78,7 @@ impl DatasetEnvVarRepository for PostgresDatasetEnvVarRepository { async fn get_all_dataset_env_vars_by_dataset_id( &self, dataset_id: &DatasetID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetDatasetEnvVarError> { let mut tr = self.transaction.lock().await; @@ -90,20 +90,20 @@ impl DatasetEnvVarRepository for PostgresDatasetEnvVarRepository { let dataset_env_var_rows = sqlx::query_as!( DatasetEnvVarRowModel, r#" - SELECT - id, - key, - value as "value: _", - secret_nonce, - created_at, - dataset_id as "dataset_id: _" - FROM dataset_env_vars - WHERE dataset_id = $1 - LIMIT $2 OFFSET $3 - "#, + SELECT + id, + key, + value as "value: _", + secret_nonce, + created_at, + dataset_id as "dataset_id: _" + FROM dataset_env_vars + WHERE dataset_id = $1 + LIMIT $2 OFFSET $3 + "#, dataset_id.to_string(), - pagination.limit, - pagination.offset, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), ) .fetch_all(connection_mut) .await @@ -126,10 +126,10 @@ impl DatasetEnvVarRepository for PostgresDatasetEnvVarRepository { let dataset_env_vars_count = sqlx::query_scalar!( r#" - SELECT - count(*) - FROM dataset_env_vars - WHERE dataset_id = $1 + SELECT + count(*) + FROM dataset_env_vars + WHERE dataset_id = $1 "#, dataset_id.to_string(), ) @@ -156,17 +156,17 @@ impl DatasetEnvVarRepository for PostgresDatasetEnvVarRepository { let dataset_env_var_row_maybe = sqlx::query_as!( DatasetEnvVarRowModel, r#" - SELECT - id, - key, - value as "value: _", - secret_nonce, - created_at, - dataset_id as "dataset_id: _" - FROM dataset_env_vars - WHERE dataset_id = $1 - and key = $2 - "#, + SELECT + id, + key, + value as "value: _", + secret_nonce, + created_at, + dataset_id as "dataset_id: _" + FROM dataset_env_vars + WHERE dataset_id = $1 + and key = $2 + "#, dataset_id.to_string(), dataset_env_var_key, ) @@ -199,16 +199,16 @@ impl DatasetEnvVarRepository for PostgresDatasetEnvVarRepository { let dataset_env_var_row_maybe = sqlx::query_as!( DatasetEnvVarRowModel, r#" - SELECT - id, - key, - value as "value: _", - secret_nonce, - created_at, - dataset_id as "dataset_id: _" - FROM dataset_env_vars - WHERE id = $1 - "#, + SELECT + id, + key, + value as "value: _", + secret_nonce, + created_at, + dataset_id as "dataset_id: _" + FROM dataset_env_vars + WHERE id = $1 + "#, dataset_env_var_id, ) .fetch_optional(connection_mut) diff --git a/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs b/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs index ef93b7997..9d05a20fa 100644 --- a/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs +++ b/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs @@ -10,7 +10,7 @@ use std::assert_matches::assert_matches; use chrono::{SubsecRound, Utc}; -use database_common::DatabasePaginationOpts; +use database_common::PaginationOpts; use dill::Catalog; use kamu_datasets::{ DatasetEnvVar, @@ -48,7 +48,7 @@ pub async fn test_missing_dataset_env_var_not_found(catalog: &Catalog) { let dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( &DatasetID::new_seeded_ed25519(b"foo"), - &DatabasePaginationOpts { + &PaginationOpts { offset: 0, limit: 5, }, @@ -97,7 +97,7 @@ pub async fn test_insert_and_get_dataset_env_var(catalog: &Catalog) { let db_dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( &dataset_id, - &DatabasePaginationOpts { + &PaginationOpts { offset: 0, limit: 5, }, @@ -151,7 +151,7 @@ pub async fn test_insert_and_get_multiple_dataset_env_vars(catalog: &Catalog) { let mut db_dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( &dataset_id, - &DatabasePaginationOpts { + &PaginationOpts { offset: 0, limit: 5, }, @@ -219,7 +219,7 @@ pub async fn test_delete_dataset_env_vars(catalog: &Catalog) { let db_dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( &dataset_id, - &DatabasePaginationOpts { + &PaginationOpts { offset: 0, limit: 5, }, diff --git a/src/infra/datasets/sqlite/src/repos/sqlite_dataset_env_var_repository.rs b/src/infra/datasets/sqlite/src/repos/sqlite_dataset_env_var_repository.rs index 34fc66f7a..04eb00830 100644 --- a/src/infra/datasets/sqlite/src/repos/sqlite_dataset_env_var_repository.rs +++ b/src/infra/datasets/sqlite/src/repos/sqlite_dataset_env_var_repository.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::{DatabasePaginationOpts, TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use opendatafabric::DatasetID; @@ -84,7 +84,7 @@ impl DatasetEnvVarRepository for SqliteDatasetEnvVarRepository { async fn get_all_dataset_env_vars_by_dataset_id( &self, dataset_id: &DatasetID, - pagination: &DatabasePaginationOpts, + pagination: &PaginationOpts, ) -> Result, GetDatasetEnvVarError> { let mut tr = self.transaction.lock().await; @@ -92,8 +92,8 @@ impl DatasetEnvVarRepository for SqliteDatasetEnvVarRepository { .connection_mut() .await .map_err(GetDatasetEnvVarError::Internal)?; - let limit = pagination.limit; - let offset = pagination.offset; + let limit = i64::try_from(pagination.limit).unwrap(); + let offset = i64::try_from(pagination.offset).unwrap(); let dataset_id_string = dataset_id.to_string(); let dataset_env_var_rows = sqlx::query_as!( diff --git a/src/infra/flow-system/inmem/Cargo.toml b/src/infra/flow-system/inmem/Cargo.toml index 32fe4230d..91786d0fb 100644 --- a/src/infra/flow-system/inmem/Cargo.toml +++ b/src/infra/flow-system/inmem/Cargo.toml @@ -22,6 +22,8 @@ doctest = false [dependencies] +database-common = { workspace = true } +internal-error = { workspace = true } opendatafabric = { workspace = true } kamu-task-system = { workspace = true } kamu-flow-system = { workspace = true } @@ -45,7 +47,6 @@ serde_with = { version = "3", default-features = false } [dev-dependencies] database-common-macros = { workspace = true } kamu-flow-system-repo-tests = { workspace = true } -kamu-task-system-inmem = { workspace = true } cron = { version = "0.12", default-features = false } tempfile = "3" diff --git a/src/infra/flow-system/inmem/src/flow/inmem_flow_event_store.rs b/src/infra/flow-system/inmem/src/flow/inmem_flow_event_store.rs index 5e049082f..9eb3b4ff7 100644 --- a/src/infra/flow-system/inmem/src/flow/inmem_flow_event_store.rs +++ b/src/infra/flow-system/inmem/src/flow/inmem_flow_event_store.rs @@ -10,6 +10,7 @@ use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; +use database_common::PaginationOpts; use dill::*; use kamu_flow_system::{BorrowedFlowKeyDataset, *}; use opendatafabric::{AccountID, DatasetID}; @@ -62,6 +63,14 @@ impl State { false } } + + fn matches_any_flow(&self, flow_id: FlowID, filters: &AllFlowFilters) -> bool { + if let Some(index_entry) = self.flow_search_index.get(&flow_id) { + index_entry.matches_all_flow_filters(filters) + } else { + false + } + } } impl EventStoreState for State { @@ -97,6 +106,11 @@ impl FlowIndexEntry { && self.initiator_matches(filters.by_initiator.as_ref()) } + pub fn matches_all_flow_filters(&self, filters: &AllFlowFilters) -> bool { + self.flow_status_matches(filters.by_flow_status) + && self.initiator_matches(filters.by_initiator.as_ref()) + } + fn dataset_flow_type_matches( &self, maybe_dataset_flow_type_filter: Option, @@ -257,8 +271,8 @@ impl EventStore for InMemoryFlowEventStore { } #[tracing::instrument(level = "debug", skip_all, fields(%query, ?opts))] - async fn get_events(&self, query: &FlowID, opts: GetEventsOpts) -> EventStream { - self.inner.get_events(query, opts).await + fn get_events(&self, query: &FlowID, opts: GetEventsOpts) -> EventStream { + self.inner.get_events(query, opts) } #[tracing::instrument(level = "debug", skip_all, fields(%query, num_events = events.len()))] @@ -284,8 +298,65 @@ impl EventStore for InMemoryFlowEventStore { #[async_trait::async_trait] impl FlowEventStore for InMemoryFlowEventStore { #[tracing::instrument(level = "debug", skip_all)] - fn new_flow_id(&self) -> FlowID { - self.inner.as_state().lock().unwrap().next_flow_id() + async fn new_flow_id(&self) -> Result { + Ok(self.inner.as_state().lock().unwrap().next_flow_id()) + } + + async fn try_get_pending_flow( + &self, + flow_key: &FlowKey, + ) -> Result, InternalError> { + let state = self.inner.as_state(); + let g = state.lock().unwrap(); + + Ok(match flow_key { + FlowKey::Dataset(flow_key) => { + let waiting_filter = DatasetFlowFilters { + by_flow_type: Some(flow_key.flow_type), + by_flow_status: Some(FlowStatus::Waiting), + by_initiator: None, + }; + + let running_filter = DatasetFlowFilters { + by_flow_type: Some(flow_key.flow_type), + by_flow_status: Some(FlowStatus::Running), + by_initiator: None, + }; + + g.all_flows_by_dataset + .get(&flow_key.dataset_id) + .map(|dataset_flow_ids| { + dataset_flow_ids.iter().rev().find(|flow_id| { + g.matches_dataset_flow(**flow_id, &waiting_filter) + || g.matches_dataset_flow(**flow_id, &running_filter) + }) + }) + .unwrap_or_default() + .copied() + } + FlowKey::System(flow_key) => { + let waiting_filter = SystemFlowFilters { + by_flow_type: Some(flow_key.flow_type), + by_flow_status: Some(FlowStatus::Waiting), + by_initiator: None, + }; + + let running_filter = SystemFlowFilters { + by_flow_type: Some(flow_key.flow_type), + by_flow_status: Some(FlowStatus::Running), + by_initiator: None, + }; + + g.all_system_flows + .iter() + .rev() + .find(|flow_id| { + g.matches_system_flow(**flow_id, &waiting_filter) + || g.matches_system_flow(**flow_id, &running_filter) + }) + .copied() + } + }) } #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id, ?flow_type))] @@ -319,8 +390,8 @@ impl FlowEventStore for InMemoryFlowEventStore { fn get_all_flow_ids_by_dataset( &self, dataset_id: &DatasetID, - filters: DatasetFlowFilters, - pagination: FlowPaginationOpts, + filters: &DatasetFlowFilters, + pagination: PaginationOpts, ) -> FlowIDStream { let flow_ids_page: Vec<_> = { let state = self.inner.as_state(); @@ -331,7 +402,7 @@ impl FlowEventStore for InMemoryFlowEventStore { dataset_flow_ids .iter() .rev() - .filter(|flow_id| g.matches_dataset_flow(**flow_id, &filters)) + .filter(|flow_id| g.matches_dataset_flow(**flow_id, filters)) .skip(pagination.offset) .take(pagination.limit) .map(|flow_id| Ok(*flow_id)) @@ -383,7 +454,7 @@ impl FlowEventStore for InMemoryFlowEventStore { &self, dataset_ids: HashSet, filters: &DatasetFlowFilters, - pagination: FlowPaginationOpts, + pagination: PaginationOpts, ) -> FlowIDStream { let flow_ids_page: Vec<_> = { let state = self.inner.as_state(); @@ -435,8 +506,8 @@ impl FlowEventStore for InMemoryFlowEventStore { #[tracing::instrument(level = "debug", skip_all, fields(?filters, ?pagination))] fn get_all_system_flow_ids( &self, - filters: SystemFlowFilters, - pagination: FlowPaginationOpts, + filters: &SystemFlowFilters, + pagination: PaginationOpts, ) -> FlowIDStream { let flow_ids_page: Vec<_> = { let state = self.inner.as_state(); @@ -444,7 +515,7 @@ impl FlowEventStore for InMemoryFlowEventStore { g.all_system_flows .iter() .rev() - .filter(|flow_id| g.matches_system_flow(**flow_id, &filters)) + .filter(|flow_id| g.matches_system_flow(**flow_id, filters)) .skip(pagination.offset) .take(pagination.limit) .map(|flow_id| Ok(*flow_id)) @@ -467,27 +538,34 @@ impl FlowEventStore for InMemoryFlowEventStore { } #[tracing::instrument(level = "debug", skip_all, fields(?pagination))] - fn get_all_flow_ids(&self, pagination: FlowPaginationOpts) -> FlowIDStream { + fn get_all_flow_ids( + &self, + filters: &AllFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { let flow_ids_page: Vec<_> = { let state = self.inner.as_state(); let g = state.lock().unwrap(); g.all_flows .iter() .rev() + .filter(|flow_id| g.matches_any_flow(**flow_id, filters)) .skip(pagination.offset) .take(pagination.limit) .map(|flow_id| Ok(*flow_id)) .collect() }; - Box::pin(futures::stream::iter(flow_ids_page)) } #[tracing::instrument(level = "debug", skip_all)] - async fn get_count_all_flows(&self) -> Result { + async fn get_count_all_flows(&self, filters: &AllFlowFilters) -> Result { let state = self.inner.as_state(); let g = state.lock().unwrap(); - Ok(g.all_flows.len()) + Ok(g.all_flows + .iter() + .filter(|flow_id| g.matches_any_flow(**flow_id, filters)) + .count()) } } diff --git a/src/infra/flow-system/inmem/src/flow_configuration/inmem_flow_configuration_event_store.rs b/src/infra/flow-system/inmem/src/flow_configuration/inmem_flow_configuration_event_store.rs index 7b8706233..00201fa44 100644 --- a/src/infra/flow-system/inmem/src/flow_configuration/inmem_flow_configuration_event_store.rs +++ b/src/infra/flow-system/inmem/src/flow_configuration/inmem_flow_configuration_event_store.rs @@ -62,12 +62,12 @@ impl EventStore for InMemoryFlowConfigurationEventStore } #[tracing::instrument(level = "debug", skip_all, fields(?query, ?opts))] - async fn get_events( + fn get_events( &self, query: &FlowKey, opts: GetEventsOpts, ) -> EventStream { - self.inner.get_events(query, opts).await + self.inner.get_events(query, opts) } #[tracing::instrument(level = "debug", skip_all, fields(?query, num_events = events.len()))] @@ -95,7 +95,7 @@ impl EventStore for InMemoryFlowConfigurationEventStore #[async_trait::async_trait] impl FlowConfigurationEventStore for InMemoryFlowConfigurationEventStore { #[tracing::instrument(level = "debug", skip_all)] - async fn list_all_dataset_ids(&self) -> FailableDatasetIDStream { + fn list_all_dataset_ids(&self) -> FailableDatasetIDStream { use futures::StreamExt; let dataset_ids = self.inner.as_state().lock().unwrap().dataset_ids.clone(); diff --git a/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_configuration_event_store.rs b/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_configuration_event_store.rs index 792e788a4..fbc14e197 100644 --- a/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_configuration_event_store.rs +++ b/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_configuration_event_store.rs @@ -15,7 +15,8 @@ use kamu_flow_system_inmem::*; database_transactional_test!( storage = inmem, - fixture = kamu_flow_system_repo_tests::test_event_store_empty, + fixture = + kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_empty, harness = InMemoryFlowConfigurationEventStoreHarness ); @@ -23,7 +24,7 @@ database_transactional_test!( database_transactional_test!( storage = inmem, - fixture = kamu_flow_system_repo_tests::test_event_store_get_streams, + fixture = kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_get_streams, harness = InMemoryFlowConfigurationEventStoreHarness ); @@ -31,7 +32,7 @@ database_transactional_test!( database_transactional_test!( storage = inmem, - fixture = kamu_flow_system_repo_tests::test_event_store_get_events_with_windowing, + fixture = kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_get_events_with_windowing, harness = InMemoryFlowConfigurationEventStoreHarness ); diff --git a/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_event_store.rs b/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_event_store.rs index 9ae8922ab..d64e1363c 100644 --- a/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_event_store.rs +++ b/src/infra/flow-system/inmem/tests/tests/test_inmem_flow_event_store.rs @@ -7,1172 +7,265 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use std::collections::HashSet; -use std::sync::Arc; - -use chrono::{Duration, Utc}; -use futures::TryStreamExt; -use kamu_flow_system::*; +use database_common_macros::database_transactional_test; +use dill::{Catalog, CatalogBuilder}; use kamu_flow_system_inmem::InMemoryFlowEventStore; -use kamu_task_system::{TaskOutcome, TaskResult, TaskSystemEventStore}; -use kamu_task_system_inmem::InMemoryTaskSystemEventStore; -use opendatafabric::{AccountID, DatasetID}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_empty_filters_distinguish_dataset() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let always_happy_filters = DatasetFlowFilters::default(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - let bar_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - always_happy_filters.clone(), - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - 6, - vec![ - foo_cases.compaction_flow_ids.flow_id_finished, - foo_cases.compaction_flow_ids.flow_id_running, - foo_cases.compaction_flow_ids.flow_id_waiting, - foo_cases.ingest_flow_ids.flow_id_finished, - foo_cases.ingest_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ) - .await; - - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &bar_cases, - always_happy_filters.clone(), - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - 6, - vec![ - bar_cases.compaction_flow_ids.flow_id_finished, - bar_cases.compaction_flow_ids.flow_id_running, - bar_cases.compaction_flow_ids.flow_id_waiting, - bar_cases.ingest_flow_ids.flow_id_finished, - bar_cases.ingest_flow_ids.flow_id_running, - bar_cases.ingest_flow_ids.flow_id_waiting, - ], - ) - .await; -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_empty_filters_distingush_dataset, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_filter_by_status() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Waiting), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_waiting, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ), - ( - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Running), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_running, - ], - ), - ( - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Finished), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_finished, - foo_cases.ingest_flow_ids.flow_id_finished, - ], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_status, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_filter_by_flow_type() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - DatasetFlowFilters { - by_flow_type: Some(DatasetFlowType::Ingest), - ..Default::default() - }, - vec![ - foo_cases.ingest_flow_ids.flow_id_finished, - foo_cases.ingest_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ), - ( - DatasetFlowFilters { - by_flow_type: Some(DatasetFlowType::HardCompaction), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_finished, - foo_cases.compaction_flow_ids.flow_id_running, - foo_cases.compaction_flow_ids.flow_id_waiting, - ], - ), - ( - DatasetFlowFilters { - by_flow_type: Some(DatasetFlowType::ExecuteTransform), - ..Default::default() - }, - vec![], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_flow_type, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_filter_by_initiator() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let wasya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"wasya")]); - let petya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"petya")]); - - let cases = vec![ - ( - DatasetFlowFilters { - by_initiator: Some(InitiatorFilter::Account(wasya_filter)), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_running, - ], - ), - ( - DatasetFlowFilters { - by_initiator: Some(InitiatorFilter::Account(petya_filter)), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_waiting, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ), - ( - DatasetFlowFilters { - by_initiator: Some(InitiatorFilter::System), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_finished, - foo_cases.ingest_flow_ids.flow_id_finished, - ], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_initiator, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_filter_by_initiator_with_multiple_variants() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let wasya_patya_filter = HashSet::from_iter([ - AccountID::new_seeded_ed25519(b"wasya"), - AccountID::new_seeded_ed25519(b"petya"), - ]); - let mut wasya_patya_unrelated_filter = wasya_patya_filter.clone(); - wasya_patya_unrelated_filter.insert(AccountID::new_seeded_ed25519(b"unrelated_user")); - - let cases = vec![ - ( - DatasetFlowFilters { - by_initiator: Some(InitiatorFilter::Account(wasya_patya_filter)), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_running, - foo_cases.compaction_flow_ids.flow_id_waiting, - foo_cases.ingest_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ), - // should return the same amount even if some non existing user was provided - ( - DatasetFlowFilters { - by_initiator: Some(InitiatorFilter::Account(wasya_patya_unrelated_filter)), - ..Default::default() - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_running, - foo_cases.compaction_flow_ids.flow_id_waiting, - foo_cases.ingest_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_initiator_with_multiple_variants, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_filter_combinations() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - let petya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"petya")]); - - let cases = vec![ - ( - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Finished), - by_flow_type: Some(DatasetFlowType::Ingest), - by_initiator: Some(InitiatorFilter::System), - }, - vec![foo_cases.ingest_flow_ids.flow_id_finished], - ), - ( - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Waiting), - by_flow_type: Some(DatasetFlowType::HardCompaction), - by_initiator: Some(InitiatorFilter::Account(petya_filter)), - }, - vec![foo_cases.compaction_flow_ids.flow_id_waiting], - ), - ( - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Running), - by_flow_type: Some(DatasetFlowType::Ingest), - by_initiator: Some(InitiatorFilter::System), - }, - vec![], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_combinations, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_pagination() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - FlowPaginationOpts { - offset: 0, - limit: 2, - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_finished, - foo_cases.compaction_flow_ids.flow_id_running, - ], - ), - ( - FlowPaginationOpts { - offset: 2, - limit: 3, - }, - vec![ - foo_cases.compaction_flow_ids.flow_id_waiting, - foo_cases.ingest_flow_ids.flow_id_finished, - foo_cases.ingest_flow_ids.flow_id_running, - ], - ), - ( - FlowPaginationOpts { - offset: 4, - limit: 2, - }, - vec![ - foo_cases.ingest_flow_ids.flow_id_running, - foo_cases.ingest_flow_ids.flow_id_waiting, - ], - ), - ( - FlowPaginationOpts { - offset: 5, - limit: 2, - }, - vec![foo_cases.ingest_flow_ids.flow_id_waiting], - ), - ( - FlowPaginationOpts { - offset: 6, - limit: 5, - }, - vec![], - ), - ]; - - for (pagination, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - Default::default(), - pagination, - 6, - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_dataset_flow_pagination_with_filters() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let foo_cases = - make_dataset_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - FlowPaginationOpts { - offset: 0, - limit: 2, - }, - DatasetFlowFilters { - by_flow_type: Some(DatasetFlowType::Ingest), - ..Default::default() - }, - 3, - vec![ - foo_cases.ingest_flow_ids.flow_id_finished, - foo_cases.ingest_flow_ids.flow_id_running, - ], - ), - ( - FlowPaginationOpts { - offset: 1, - limit: 2, - }, - DatasetFlowFilters { - by_flow_status: Some(FlowStatus::Waiting), - ..Default::default() - }, - 2, - vec![foo_cases.ingest_flow_ids.flow_id_waiting], - ), - ( - FlowPaginationOpts { - offset: 1, - limit: 2, - }, - DatasetFlowFilters { - by_initiator: Some(InitiatorFilter::System), - ..Default::default() - }, - 2, - vec![foo_cases.ingest_flow_ids.flow_id_finished], - ), - ]; - - for (pagination, filters, expected_total_count, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( - flow_event_store.clone(), - &foo_cases, - filters, - pagination, - expected_total_count, - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets_and_status, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_unfiltered_system_flows() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - assert_system_flow_expectaitons( - flow_event_store.clone(), - SystemFlowFilters::default(), - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - 3, - vec![ - system_case.gc_flow_ids.flow_id_finished, - system_case.gc_flow_ids.flow_id_running, - system_case.gc_flow_ids.flow_id_waiting, - ], - ) - .await; -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets_with_pagination, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_system_flows_filtered_by_flow_type() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![( - SystemFlowFilters { - by_flow_type: Some(SystemFlowType::GC), - ..Default::default() - }, - vec![ - system_case.gc_flow_ids.flow_id_finished, - system_case.gc_flow_ids.flow_id_running, - system_case.gc_flow_ids.flow_id_waiting, - ], - )]; - - for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( - flow_event_store.clone(), - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_pagination, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_system_flows_filtered_by_flow_status() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - SystemFlowFilters { - by_flow_status: Some(FlowStatus::Waiting), - ..Default::default() - }, - vec![system_case.gc_flow_ids.flow_id_waiting], - ), - ( - SystemFlowFilters { - by_flow_status: Some(FlowStatus::Running), - ..Default::default() - }, - vec![system_case.gc_flow_ids.flow_id_running], - ), - ( - SystemFlowFilters { - by_flow_status: Some(FlowStatus::Finished), - ..Default::default() - }, - vec![system_case.gc_flow_ids.flow_id_finished], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( - flow_event_store.clone(), - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_pagination_with_filters, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_system_flows_filtered_by_initiator() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let wasya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"wasya")]); - let unrelated_user_filter = - HashSet::from_iter([AccountID::new_seeded_ed25519(b"unrelated-user")]); - - let cases = vec![ - ( - SystemFlowFilters { - by_initiator: Some(InitiatorFilter::System), - ..Default::default() - }, - vec![system_case.gc_flow_ids.flow_id_finished], - ), - ( - SystemFlowFilters { - by_initiator: Some(InitiatorFilter::Account(wasya_filter)), - ..Default::default() - }, - vec![system_case.gc_flow_ids.flow_id_running], - ), - ( - SystemFlowFilters { - by_initiator: Some(InitiatorFilter::Account(unrelated_user_filter)), - ..Default::default() - }, - vec![], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( - flow_event_store.clone(), - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_get_flow_initiators, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_system_flows_complex_filter() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - let petya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"petya")]); - - let cases = vec![ - ( - SystemFlowFilters { - by_flow_status: Some(FlowStatus::Finished), - by_initiator: Some(InitiatorFilter::System), - by_flow_type: Some(SystemFlowType::GC), - }, - vec![system_case.gc_flow_ids.flow_id_finished], - ), - ( - SystemFlowFilters { - by_initiator: Some(InitiatorFilter::Account(petya_filter)), - by_flow_status: Some(FlowStatus::Waiting), - by_flow_type: None, - }, - vec![system_case.gc_flow_ids.flow_id_waiting], - ), - ( - SystemFlowFilters { - by_flow_status: Some(FlowStatus::Running), - by_initiator: Some(InitiatorFilter::System), - by_flow_type: Some(SystemFlowType::GC), - }, - vec![], - ), - ]; - - for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( - flow_event_store.clone(), - filters, - FlowPaginationOpts { - offset: 0, - limit: 100, - }, - expected_flow_ids.len(), - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_unfiltered_system_flows, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_system_flow_pagination() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - FlowPaginationOpts { - offset: 0, - limit: 2, - }, - vec![ - system_case.gc_flow_ids.flow_id_finished, - system_case.gc_flow_ids.flow_id_running, - ], - ), - ( - FlowPaginationOpts { - offset: 1, - limit: 2, - }, - vec![ - system_case.gc_flow_ids.flow_id_running, - system_case.gc_flow_ids.flow_id_waiting, - ], - ), - ( - FlowPaginationOpts { - offset: 2, - limit: 2, - }, - vec![system_case.gc_flow_ids.flow_id_waiting], - ), - ( - FlowPaginationOpts { - offset: 3, - limit: 5, - }, - vec![], - ), - ]; - - for (pagination, expected_flow_ids) in cases { - assert_system_flow_expectaitons( - flow_event_store.clone(), - Default::default(), - pagination, - 3, - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_flow_type, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[test_log::test(tokio::test)] -async fn test_system_flow_pagination_with_filters() { - let (flow_event_store, task_event_store) = make_event_stores(); - - let system_case = - make_system_test_case(flow_event_store.clone(), task_event_store.clone()).await; - - let cases = vec![ - ( - FlowPaginationOpts { - offset: 0, - limit: 2, - }, - SystemFlowFilters { - by_flow_type: Some(SystemFlowType::GC), - ..Default::default() - }, - 3, - vec![ - system_case.gc_flow_ids.flow_id_finished, - system_case.gc_flow_ids.flow_id_running, - ], - ), - ( - FlowPaginationOpts { - offset: 0, - limit: 2, - }, - SystemFlowFilters { - by_flow_status: Some(FlowStatus::Waiting), - ..Default::default() - }, - 1, - vec![system_case.gc_flow_ids.flow_id_waiting], - ), - ( - FlowPaginationOpts { - offset: 1, - limit: 2, - }, - SystemFlowFilters { - by_initiator: Some(InitiatorFilter::System), - ..Default::default() - }, - 1, - vec![], - ), - ]; - - for (pagination, filters, expected_total_count, expected_flow_ids) in cases { - assert_system_flow_expectaitons( - flow_event_store.clone(), - filters, - pagination, - expected_total_count, - expected_flow_ids, - ) - .await; - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_flow_status, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -fn make_event_stores() -> (Arc, Arc) { - ( - Arc::new(InMemoryFlowEventStore::new()), - Arc::new(InMemoryTaskSystemEventStore::new()), - ) -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_initiator, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct DatasetTestCase { - dataset_id: DatasetID, - ingest_flow_ids: TestFlowIDs, - compaction_flow_ids: TestFlowIDs, -} - -struct SystemTestCase { - gc_flow_ids: TestFlowIDs, -} - -struct TestFlowIDs { - flow_id_waiting: FlowID, // Initiator: petya - flow_id_running: FlowID, // Initiator: wasya - flow_id_finished: FlowID, // Initiator: system -} - -async fn make_dataset_test_case( - flow_event_store: Arc, - task_event_store: Arc, -) -> DatasetTestCase { - let (_, dataset_id) = DatasetID::new_generated_ed25519(); - - DatasetTestCase { - dataset_id: dataset_id.clone(), - ingest_flow_ids: make_dataset_test_flows( - &dataset_id, - DatasetFlowType::Ingest, - flow_event_store.clone(), - task_event_store.clone(), - ) - .await, - compaction_flow_ids: make_dataset_test_flows( - &dataset_id, - DatasetFlowType::HardCompaction, - flow_event_store, - task_event_store, - ) - .await, - } -} - -async fn make_system_test_case( - flow_event_store: Arc, - task_event_store: Arc, -) -> SystemTestCase { - SystemTestCase { - gc_flow_ids: make_system_test_flows(SystemFlowType::GC, flow_event_store, task_event_store) - .await, - } -} - -async fn make_dataset_test_flows( - dataset_id: &DatasetID, - dataset_flow_type: DatasetFlowType, - flow_event_store: Arc, - task_event_store: Arc, -) -> TestFlowIDs { - let flow_generator = - DatasetFlowGenerator::new(dataset_id, flow_event_store.clone(), task_event_store); - - let wasya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { - trigger_time: Utc::now(), - initiator_account_id: AccountID::new_seeded_ed25519(b"wasya"), - }); - - let petya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { - trigger_time: Utc::now(), - initiator_account_id: AccountID::new_seeded_ed25519(b"petya"), - }); - - let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { - trigger_time: Utc::now(), - }); - - let flow_id_waiting = flow_generator - .make_new_flow( - dataset_flow_type, - FlowStatus::Waiting, - petya_manual_trigger, - None, - ) - .await; - let flow_id_running = flow_generator - .make_new_flow( - dataset_flow_type, - FlowStatus::Running, - wasya_manual_trigger, - None, - ) - .await; - let flow_id_finished = flow_generator - .make_new_flow( - dataset_flow_type, - FlowStatus::Finished, - automatic_trigger, - None, - ) - .await; - - TestFlowIDs { - flow_id_waiting, - flow_id_running, - flow_id_finished, - } -} - -async fn make_system_test_flows( - system_flow_type: SystemFlowType, - flow_event_store: Arc, - task_event_store: Arc, -) -> TestFlowIDs { - let flow_generator = SystemFlowGenerator::new(flow_event_store.clone(), task_event_store); - - let wasya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { - trigger_time: Utc::now(), - initiator_account_id: AccountID::new_seeded_ed25519(b"wasya"), - }); +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_complex_filter, + harness = InMemoryFlowEventStoreHarness +); - let petya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { - trigger_time: Utc::now(), - initiator_account_id: AccountID::new_seeded_ed25519(b"petya"), - }); - - let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { - trigger_time: Utc::now(), - }); - - let flow_id_waiting = flow_generator - .make_new_flow( - system_flow_type, - FlowStatus::Waiting, - petya_manual_trigger, - None, - ) - .await; - let flow_id_running = flow_generator - .make_new_flow( - system_flow_type, - FlowStatus::Running, - wasya_manual_trigger, - None, - ) - .await; - let flow_id_finished = flow_generator - .make_new_flow( - system_flow_type, - FlowStatus::Finished, - automatic_trigger, - None, - ) - .await; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - TestFlowIDs { - flow_id_waiting, - flow_id_running, - flow_id_finished, - } -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_pagination, + harness = InMemoryFlowEventStoreHarness +); -async fn assert_dataset_flow_expectaitons( - flow_event_store: Arc, - dataset_test_case: &DatasetTestCase, - filters: DatasetFlowFilters, - pagination: FlowPaginationOpts, - expected_total_count: usize, - expected_flow_ids: Vec, -) { - let total_flows_count = flow_event_store - .get_count_flows_by_dataset(&dataset_test_case.dataset_id, &filters) - .await - .unwrap(); - assert_eq!(expected_total_count, total_flows_count); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - let flow_ids: Vec<_> = flow_event_store - .get_all_flow_ids_by_dataset(&dataset_test_case.dataset_id, filters, pagination) - .try_collect() - .await - .unwrap(); - assert_eq!(flow_ids, expected_flow_ids); -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_pagination_with_filters, + harness = InMemoryFlowEventStoreHarness +); -async fn assert_system_flow_expectaitons( - flow_event_store: Arc, - filters: SystemFlowFilters, - pagination: FlowPaginationOpts, - expected_total_count: usize, - expected_flow_ids: Vec, -) { - let total_flows_count = flow_event_store - .get_count_system_flows(&filters) - .await - .unwrap(); - assert_eq!(expected_total_count, total_flows_count); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - let flow_ids: Vec<_> = flow_event_store - .get_all_system_flow_ids(filters, pagination) - .try_collect() - .await - .unwrap(); - assert_eq!(flow_ids, expected_flow_ids); -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_unpaged, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct DatasetFlowGenerator<'a> { - dataset_id: &'a DatasetID, - flow_event_store: Arc, - task_event_store: Arc, -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_pagination, + harness = InMemoryFlowEventStoreHarness +); -impl<'a> DatasetFlowGenerator<'a> { - fn new( - dataset_id: &'a DatasetID, - flow_event_store: Arc, - task_event_store: Arc, - ) -> Self { - Self { - dataset_id, - flow_event_store, - task_event_store, - } - } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - async fn make_new_flow( - &self, - flow_type: DatasetFlowType, - expected_status: FlowStatus, - initial_trigger: FlowTrigger, - config_snapshot: Option, - ) -> FlowID { - let flow_id = self.flow_event_store.new_flow_id(); +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_filters, + harness = InMemoryFlowEventStoreHarness +); - let creation_moment = Utc::now(); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - let mut flow = Flow::new( - creation_moment, - flow_id, - FlowKeyDataset { - dataset_id: self.dataset_id.clone(), - flow_type, - } - .into(), - initial_trigger, - config_snapshot, - ); +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_run_stats, + harness = InMemoryFlowEventStoreHarness +); - drive_flow_to_status(&mut flow, self.task_event_store.as_ref(), expected_status).await; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - flow.save(self.flow_event_store.as_ref()).await.unwrap(); - - flow_id - } -} +database_transactional_test!( + storage = inmem, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_run_stats, + harness = InMemoryFlowEventStoreHarness +); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct SystemFlowGenerator { - flow_event_store: Arc, - task_event_store: Arc, -} - -impl SystemFlowGenerator { - fn new( - flow_event_store: Arc, - task_event_store: Arc, - ) -> Self { - Self { - flow_event_store, - task_event_store, - } - } +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_dataset_single_type_crud, + harness = InMemoryFlowEventStoreHarness +); - async fn make_new_flow( - &self, - flow_type: SystemFlowType, - expected_status: FlowStatus, - initial_trigger: FlowTrigger, - config_snapshot: Option, - ) -> FlowID { - let flow_id = self.flow_event_store.new_flow_id(); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - let creation_moment = Utc::now(); +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_dataset_multiple_types_crud, + harness = InMemoryFlowEventStoreHarness +); - let mut flow = Flow::new( - creation_moment, - flow_id, - FlowKey::System(FlowKeySystem { flow_type }), - initial_trigger, - config_snapshot, - ); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - drive_flow_to_status(&mut flow, self.task_event_store.as_ref(), expected_status).await; +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_multiple_datasets_crud, + harness = InMemoryFlowEventStoreHarness +); - flow.save(self.flow_event_store.as_ref()).await.unwrap(); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - flow_id - } -} +database_transactional_test!( + storage = inmem, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_system_flow_crud, + harness = InMemoryFlowEventStoreHarness +); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Harness //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -async fn drive_flow_to_status( - flow: &mut Flow, - task_event_store: &dyn TaskSystemEventStore, - expected_status: FlowStatus, -) { - let start_moment = Utc::now(); - - flow.set_relevant_start_condition( - start_moment + Duration::try_seconds(1).unwrap(), - FlowStartCondition::Schedule(FlowStartConditionSchedule { - wake_up_at: start_moment + Duration::try_minutes(1).unwrap(), - }), - ) - .unwrap(); +struct InMemoryFlowEventStoreHarness { + catalog: Catalog, +} - if expected_status != FlowStatus::Waiting { - let task_id = task_event_store.new_task_id().await.unwrap(); - flow.on_task_scheduled(start_moment + Duration::try_minutes(5).unwrap(), task_id) - .unwrap(); - flow.on_task_running(start_moment + Duration::try_minutes(7).unwrap(), task_id) - .unwrap(); +impl InMemoryFlowEventStoreHarness { + pub fn new() -> Self { + let mut catalog_builder = CatalogBuilder::new(); + catalog_builder.add::(); - if expected_status == FlowStatus::Finished { - flow.on_task_finished( - start_moment + Duration::try_minutes(10).unwrap(), - task_id, - TaskOutcome::Success(TaskResult::Empty), - ) - .unwrap(); - } else if expected_status != FlowStatus::Running { - panic!("Not expecting flow status {expected_status:?}"); + Self { + catalog: catalog_builder.build(), } } } diff --git a/src/infra/flow-system/postgres/.sqlx/query-0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10.json b/src/infra/flow-system/postgres/.sqlx/query-0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10.json new file mode 100644 index 000000000..ebe7de00d --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "\n UPDATE flows\n SET flow_status = $2\n WHERE flow_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int8", + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + } + ] + }, + "nullable": [] + }, + "hash": "0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-161cde40e2336e520368eddc5f01e1e00b1a5bbad51fafe95800d8398ec9b1a8.json b/src/infra/flow-system/postgres/.sqlx/query-161cde40e2336e520368eddc5f01e1e00b1a5bbad51fafe95800d8398ec9b1a8.json new file mode 100644 index 000000000..52c99b870 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-161cde40e2336e520368eddc5f01e1e00b1a5bbad51fafe95800d8398ec9b1a8.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT attempt.last_event_time as last_attempt_time\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n f.dataset_id = $1 AND\n f.dataset_flow_type = $2\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS attempt\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "last_attempt_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + } + ] + }, + "nullable": [ + false + ] + }, + "hash": "161cde40e2336e520368eddc5f01e1e00b1a5bbad51fafe95800d8398ec9b1a8" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b.json b/src/infra/flow-system/postgres/.sqlx/query-1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b.json new file mode 100644 index 000000000..3ad209b7d --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b.json @@ -0,0 +1,41 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO flows (flow_id, dataset_id, dataset_flow_type, initiator, flow_status)\n VALUES ($1, $2, $3, $4, $5)\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int8", + "Varchar", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + }, + "Varchar", + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + } + ] + }, + "nullable": [] + }, + "hash": "1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-adab77fb8e48b76c19950d600a47ea825d02dde1f41d37181787b0b1d56c7e57.json b/src/infra/flow-system/postgres/.sqlx/query-270be1ac6c19dda611c6a90ac3e4fa387a38f2aa9bf98152049d991feb3fe53e.json similarity index 55% rename from src/infra/flow-system/postgres/.sqlx/query-adab77fb8e48b76c19950d600a47ea825d02dde1f41d37181787b0b1d56c7e57.json rename to src/infra/flow-system/postgres/.sqlx/query-270be1ac6c19dda611c6a90ac3e4fa387a38f2aa9bf98152049d991feb3fe53e.json index 02c9c7364..f60d4de23 100644 --- a/src/infra/flow-system/postgres/.sqlx/query-adab77fb8e48b76c19950d600a47ea825d02dde1f41d37181787b0b1d56c7e57.json +++ b/src/infra/flow-system/postgres/.sqlx/query-270be1ac6c19dda611c6a90ac3e4fa387a38f2aa9bf98152049d991feb3fe53e.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT event_id, event_payload\n FROM dataset_flow_configuration_events\n WHERE dataset_id = $1\n AND dataset_flow_type = ($2::text)::dataset_flow_type\n AND (cast($3 as INT8) IS NULL or event_id > $3)\n AND (cast($4 as INT8) IS NULL or event_id <= $4)\n ", + "query": "\n SELECT event_id, event_payload\n FROM flow_configuration_events\n WHERE dataset_id = $1\n AND dataset_flow_type = ($2::text)::dataset_flow_type\n AND (cast($3 as INT8) IS NULL or event_id > $3)\n AND (cast($4 as INT8) IS NULL or event_id <= $4)\n ", "describe": { "columns": [ { @@ -27,5 +27,5 @@ false ] }, - "hash": "adab77fb8e48b76c19950d600a47ea825d02dde1f41d37181787b0b1d56c7e57" + "hash": "270be1ac6c19dda611c6a90ac3e4fa387a38f2aa9bf98152049d991feb3fe53e" } diff --git a/src/infra/flow-system/postgres/.sqlx/query-2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc.json b/src/infra/flow-system/postgres/.sqlx/query-2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc.json new file mode 100644 index 000000000..5fbcbb037 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(event_id) AS events_count\n FROM flow_configuration_events\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "events_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-5441453c3b7a5fc0ed93a300a13fac7494c8488385ccc7b1a0ff1d8ab2a2cbbc.json b/src/infra/flow-system/postgres/.sqlx/query-5441453c3b7a5fc0ed93a300a13fac7494c8488385ccc7b1a0ff1d8ab2a2cbbc.json deleted file mode 100644 index a14b68688..000000000 --- a/src/infra/flow-system/postgres/.sqlx/query-5441453c3b7a5fc0ed93a300a13fac7494c8488385ccc7b1a0ff1d8ab2a2cbbc.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT SUM(event_count)::BIGINT as count\n FROM (SELECT COUNT(event_id) as event_count\n FROM dataset_flow_configuration_events\n UNION ALL\n SELECT COUNT(event_id) as event_count\n FROM system_flow_configuration_events) as counts;\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "count", - "type_info": "Int8" - } - ], - "parameters": { - "Left": [] - }, - "nullable": [ - null - ] - }, - "hash": "5441453c3b7a5fc0ed93a300a13fac7494c8488385ccc7b1a0ff1d8ab2a2cbbc" -} diff --git a/src/infra/flow-system/postgres/.sqlx/query-5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430.json b/src/infra/flow-system/postgres/.sqlx/query-5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430.json new file mode 100644 index 000000000..6e545139e --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430.json @@ -0,0 +1,23 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT DISTINCT(initiator) FROM flows\n WHERE dataset_id = $1 AND initiator != $2\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "initiator", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "Text", + "Text" + ] + }, + "nullable": [ + false + ] + }, + "hash": "5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-617464a1636be54a17ae8c7cdb8a328dfb878f37aa1c1f8b3d2e073a12292cae.json b/src/infra/flow-system/postgres/.sqlx/query-617464a1636be54a17ae8c7cdb8a328dfb878f37aa1c1f8b3d2e073a12292cae.json new file mode 100644 index 000000000..7d3763065 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-617464a1636be54a17ae8c7cdb8a328dfb878f37aa1c1f8b3d2e073a12292cae.json @@ -0,0 +1,50 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT flow_id FROM flows\n WHERE dataset_id = $1\n AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2)\n AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3)\n AND (cast($4 as TEXT[]) IS NULL OR initiator = ANY($4))\n ORDER BY flow_id DESC\n LIMIT $5 OFFSET $6\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flow_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Text", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + }, + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray", + "Int8", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "617464a1636be54a17ae8c7cdb8a328dfb878f37aa1c1f8b3d2e073a12292cae" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9.json b/src/infra/flow-system/postgres/.sqlx/query-6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9.json new file mode 100644 index 000000000..bb3c4c941 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(event_id) AS events_count\n FROM flow_events\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "events_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f.json b/src/infra/flow-system/postgres/.sqlx/query-68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854.json similarity index 50% rename from src/infra/flow-system/postgres/.sqlx/query-58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f.json rename to src/infra/flow-system/postgres/.sqlx/query-68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854.json index 2be13044b..625e68d55 100644 --- a/src/infra/flow-system/postgres/.sqlx/query-58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f.json +++ b/src/infra/flow-system/postgres/.sqlx/query-68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT DISTINCT dataset_id\n FROM dataset_flow_configuration_events\n WHERE event_type = 'FlowConfigurationEventCreated'\n ", + "query": "\n SELECT DISTINCT dataset_id\n FROM flow_configuration_events\n WHERE\n dataset_id IS NOT NULL AND\n event_type = 'FlowConfigurationEventCreated'\n ", "describe": { "columns": [ { @@ -13,8 +13,8 @@ "Left": [] }, "nullable": [ - false + true ] }, - "hash": "58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f" + "hash": "68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854" } diff --git a/src/infra/flow-system/postgres/.sqlx/query-81589f523449579cd2055cc5e0fe72949f8fa6bc064fcf8076e13eec01e24cbf.json b/src/infra/flow-system/postgres/.sqlx/query-81589f523449579cd2055cc5e0fe72949f8fa6bc064fcf8076e13eec01e24cbf.json new file mode 100644 index 000000000..4bebe5408 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-81589f523449579cd2055cc5e0fe72949f8fa6bc064fcf8076e13eec01e24cbf.json @@ -0,0 +1,30 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT event_id, event_payload\n FROM flow_events\n WHERE flow_id = $1\n AND (cast($2 as INT8) IS NULL OR event_id > $2)\n AND (cast($3 as INT8) IS NULL OR event_id <= $3)\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "event_id", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "event_payload", + "type_info": "Jsonb" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8", + "Int8" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "81589f523449579cd2055cc5e0fe72949f8fa6bc064fcf8076e13eec01e24cbf" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-81ea87fc3b6d1f313b7fa21098f2bc8a1d7a550eeba5c082fcebbdf5266e9784.json b/src/infra/flow-system/postgres/.sqlx/query-81ea87fc3b6d1f313b7fa21098f2bc8a1d7a550eeba5c082fcebbdf5266e9784.json new file mode 100644 index 000000000..00759bc67 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-81ea87fc3b6d1f313b7fa21098f2bc8a1d7a550eeba5c082fcebbdf5266e9784.json @@ -0,0 +1,44 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(flow_id) AS flows_count\n FROM flows\n WHERE system_flow_type IS NOT NULL\n AND (cast($1 as system_flow_type) IS NULL OR system_flow_type = $1)\n AND (cast($2 as flow_status_type) IS NULL or flow_status = $2)\n AND (cast($3 as TEXT[]) IS NULL OR initiator = ANY($3))\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flows_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "system_flow_type", + "kind": { + "Enum": [ + "gc" + ] + } + } + }, + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray" + ] + }, + "nullable": [ + null + ] + }, + "hash": "81ea87fc3b6d1f313b7fa21098f2bc8a1d7a550eeba5c082fcebbdf5266e9784" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-82fe203c83a993d993f6850e720d1342fdb7726b2b206e0d494a99bc7c555cd2.json b/src/infra/flow-system/postgres/.sqlx/query-82fe203c83a993d993f6850e720d1342fdb7726b2b206e0d494a99bc7c555cd2.json new file mode 100644 index 000000000..8ee9e2daf --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-82fe203c83a993d993f6850e720d1342fdb7726b2b206e0d494a99bc7c555cd2.json @@ -0,0 +1,31 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT success.last_event_time as last_success_time\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n e.event_payload::json#>'{TaskFinished,task_outcome,Success}' IS NOT NULL AND\n f.system_flow_type = $1\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS success\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "last_success_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "system_flow_type", + "kind": { + "Enum": [ + "gc" + ] + } + } + } + ] + }, + "nullable": [ + false + ] + }, + "hash": "82fe203c83a993d993f6850e720d1342fdb7726b2b206e0d494a99bc7c555cd2" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-849e4e06bc203af6f1b895b839dc64fb200f2a9b93a80a5cc9ab8f7471047639.json b/src/infra/flow-system/postgres/.sqlx/query-849e4e06bc203af6f1b895b839dc64fb200f2a9b93a80a5cc9ab8f7471047639.json new file mode 100644 index 000000000..e77580e5d --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-849e4e06bc203af6f1b895b839dc64fb200f2a9b93a80a5cc9ab8f7471047639.json @@ -0,0 +1,50 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT flow_id FROM flows\n WHERE dataset_id = ANY($1)\n AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2)\n AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3)\n AND (cast($4 as TEXT[]) IS NULL OR initiator = ANY($4))\n ORDER BY flow_id DESC\n LIMIT $5 OFFSET $6\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flow_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "TextArray", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + }, + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray", + "Int8", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "849e4e06bc203af6f1b895b839dc64fb200f2a9b93a80a5cc9ab8f7471047639" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-8ebfb0c29db1f2996b1c293d2364574404c00d309cb55cdb4667d9fd8be5f7a6.json b/src/infra/flow-system/postgres/.sqlx/query-8ebfb0c29db1f2996b1c293d2364574404c00d309cb55cdb4667d9fd8be5f7a6.json new file mode 100644 index 000000000..a8804b29a --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-8ebfb0c29db1f2996b1c293d2364574404c00d309cb55cdb4667d9fd8be5f7a6.json @@ -0,0 +1,31 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT attempt.last_event_time as last_attempt_time\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n f.system_flow_type = $1\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS attempt\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "last_attempt_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "system_flow_type", + "kind": { + "Enum": [ + "gc" + ] + } + } + } + ] + }, + "nullable": [ + false + ] + }, + "hash": "8ebfb0c29db1f2996b1c293d2364574404c00d309cb55cdb4667d9fd8be5f7a6" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-90af9ad24c0daa8e95f5eddeca57262ffde08dfcc6c3ae60d466c9558dff38b0.json b/src/infra/flow-system/postgres/.sqlx/query-90af9ad24c0daa8e95f5eddeca57262ffde08dfcc6c3ae60d466c9558dff38b0.json new file mode 100644 index 000000000..c27356d99 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-90af9ad24c0daa8e95f5eddeca57262ffde08dfcc6c3ae60d466c9558dff38b0.json @@ -0,0 +1,36 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT flow_id FROM flows\n WHERE\n (cast($1 as flow_status_type) IS NULL or flow_status = $1)\n AND (cast($2 as TEXT[]) IS NULL OR initiator = ANY($2))\n ORDER BY flow_id DESC\n LIMIT $3 OFFSET $4\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flow_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray", + "Int8", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "90af9ad24c0daa8e95f5eddeca57262ffde08dfcc6c3ae60d466c9558dff38b0" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-8221fa671128830eebccbcdc6a26cb7f24c81cccb3d2275654a997de87a3e92f.json b/src/infra/flow-system/postgres/.sqlx/query-9bc951c79e74de1e47ee4151713667ab50dd04d1a47e0475796958d526ad0811.json similarity index 53% rename from src/infra/task-system/postgres/.sqlx/query-8221fa671128830eebccbcdc6a26cb7f24c81cccb3d2275654a997de87a3e92f.json rename to src/infra/flow-system/postgres/.sqlx/query-9bc951c79e74de1e47ee4151713667ab50dd04d1a47e0475796958d526ad0811.json index 7e25407a9..36bf06599 100644 --- a/src/infra/task-system/postgres/.sqlx/query-8221fa671128830eebccbcdc6a26cb7f24c81cccb3d2275654a997de87a3e92f.json +++ b/src/infra/flow-system/postgres/.sqlx/query-9bc951c79e74de1e47ee4151713667ab50dd04d1a47e0475796958d526ad0811.json @@ -1,11 +1,11 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT COUNT(event_id) from task_events\n ", + "query": "\n SELECT nextval('flow_id_seq') AS new_flow_id\n ", "describe": { "columns": [ { "ordinal": 0, - "name": "count", + "name": "new_flow_id", "type_info": "Int8" } ], @@ -16,5 +16,5 @@ null ] }, - "hash": "8221fa671128830eebccbcdc6a26cb7f24c81cccb3d2275654a997de87a3e92f" + "hash": "9bc951c79e74de1e47ee4151713667ab50dd04d1a47e0475796958d526ad0811" } diff --git a/src/infra/flow-system/postgres/.sqlx/query-9cb16215957bf11d3c321b6356fcf04807c7359421f7ffaaa4184ae298faf835.json b/src/infra/flow-system/postgres/.sqlx/query-9cb16215957bf11d3c321b6356fcf04807c7359421f7ffaaa4184ae298faf835.json new file mode 100644 index 000000000..6198ebcd8 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-9cb16215957bf11d3c321b6356fcf04807c7359421f7ffaaa4184ae298faf835.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(flow_id) AS flows_count\n FROM flows\n WHERE\n (cast($1 as flow_status_type) IS NULL or flow_status = $1)\n AND (cast($2 as TEXT[]) IS NULL OR initiator = ANY($2))\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flows_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray" + ] + }, + "nullable": [ + null + ] + }, + "hash": "9cb16215957bf11d3c321b6356fcf04807c7359421f7ffaaa4184ae298faf835" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-a3b4464cf084c07a2376ad71365b76c00ae859ce96bd4fe4c10e2436069d8f2c.json b/src/infra/flow-system/postgres/.sqlx/query-a3b4464cf084c07a2376ad71365b76c00ae859ce96bd4fe4c10e2436069d8f2c.json new file mode 100644 index 000000000..c2a16ba35 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-a3b4464cf084c07a2376ad71365b76c00ae859ce96bd4fe4c10e2436069d8f2c.json @@ -0,0 +1,31 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT flow_id FROM flows\n WHERE system_flow_type = $1 AND\n flow_status != 'finished'::flow_status_type\n ORDER BY flow_id DESC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flow_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "system_flow_type", + "kind": { + "Enum": [ + "gc" + ] + } + } + } + ] + }, + "nullable": [ + false + ] + }, + "hash": "a3b4464cf084c07a2376ad71365b76c00ae859ce96bd4fe4c10e2436069d8f2c" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57.json b/src/infra/flow-system/postgres/.sqlx/query-ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57.json new file mode 100644 index 000000000..f5799be16 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57.json @@ -0,0 +1,37 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO flows (flow_id, system_flow_type, initiator, flow_status)\n VALUES ($1, $2, $3, $4)\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int8", + { + "Custom": { + "name": "system_flow_type", + "kind": { + "Enum": [ + "gc" + ] + } + } + }, + "Varchar", + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + } + ] + }, + "nullable": [] + }, + "hash": "ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-aec21a4f38c17c1d076b083a4d91e5a4b2ff4f69cf775f7eaa87a1d0dd4a1da3.json b/src/infra/flow-system/postgres/.sqlx/query-aec21a4f38c17c1d076b083a4d91e5a4b2ff4f69cf775f7eaa87a1d0dd4a1da3.json new file mode 100644 index 000000000..d0242ae3e --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-aec21a4f38c17c1d076b083a4d91e5a4b2ff4f69cf775f7eaa87a1d0dd4a1da3.json @@ -0,0 +1,48 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(flow_id) AS flows_count\n FROM flows\n WHERE dataset_id = $1\n AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2)\n AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3)\n AND (cast($4 as TEXT[]) IS NULL OR initiator = ANY($4))\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flows_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Text", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + }, + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray" + ] + }, + "nullable": [ + null + ] + }, + "hash": "aec21a4f38c17c1d076b083a4d91e5a4b2ff4f69cf775f7eaa87a1d0dd4a1da3" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-b098bed8965fe53906ed4b662f1691c5a749664e448642ebdb2e7a820768163e.json b/src/infra/flow-system/postgres/.sqlx/query-b098bed8965fe53906ed4b662f1691c5a749664e448642ebdb2e7a820768163e.json new file mode 100644 index 000000000..208b787c2 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-b098bed8965fe53906ed4b662f1691c5a749664e448642ebdb2e7a820768163e.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT success.last_event_time as last_success_time\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n e.event_payload::json#>'{TaskFinished,task_outcome,Success}' IS NOT NULL AND\n f.dataset_id = $1 AND\n f.dataset_flow_type = $2\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS success\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "last_success_time", + "type_info": "Timestamptz" + } + ], + "parameters": { + "Left": [ + "Text", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + } + ] + }, + "nullable": [ + false + ] + }, + "hash": "b098bed8965fe53906ed4b662f1691c5a749664e448642ebdb2e7a820768163e" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-bdf38d360a0aec9f53225347eff79ed183c12845fb7e56dfd61c0741107958de.json b/src/infra/flow-system/postgres/.sqlx/query-bdf38d360a0aec9f53225347eff79ed183c12845fb7e56dfd61c0741107958de.json new file mode 100644 index 000000000..3c66e1127 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-bdf38d360a0aec9f53225347eff79ed183c12845fb7e56dfd61c0741107958de.json @@ -0,0 +1,46 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT flow_id FROM flows\n WHERE system_flow_type IS NOT NULL\n AND (cast($1 as system_flow_type) IS NULL OR system_flow_type = $1)\n AND (cast($2 as flow_status_type) IS NULL or flow_status = $2)\n AND (cast($3 as TEXT[]) IS NULL OR initiator = ANY($3))\n ORDER BY flow_id DESC\n LIMIT $4 OFFSET $5\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flow_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + { + "Custom": { + "name": "system_flow_type", + "kind": { + "Enum": [ + "gc" + ] + } + } + }, + { + "Custom": { + "name": "flow_status_type", + "kind": { + "Enum": [ + "waiting", + "running", + "finished" + ] + } + } + }, + "TextArray", + "Int8", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "bdf38d360a0aec9f53225347eff79ed183c12845fb7e56dfd61c0741107958de" +} diff --git a/src/infra/flow-system/postgres/.sqlx/query-bd7fa47e13defd9dc6b7b10cfb7b0c2a3383fdae83e2e9cdc46f3121bf10c8b2.json b/src/infra/flow-system/postgres/.sqlx/query-d6e99f6892fe26dad007d61005080963094a4a1e892184b6a7f3a09fb86b64ac.json similarity index 57% rename from src/infra/flow-system/postgres/.sqlx/query-bd7fa47e13defd9dc6b7b10cfb7b0c2a3383fdae83e2e9cdc46f3121bf10c8b2.json rename to src/infra/flow-system/postgres/.sqlx/query-d6e99f6892fe26dad007d61005080963094a4a1e892184b6a7f3a09fb86b64ac.json index 093dadd35..33a8ba03f 100644 --- a/src/infra/flow-system/postgres/.sqlx/query-bd7fa47e13defd9dc6b7b10cfb7b0c2a3383fdae83e2e9cdc46f3121bf10c8b2.json +++ b/src/infra/flow-system/postgres/.sqlx/query-d6e99f6892fe26dad007d61005080963094a4a1e892184b6a7f3a09fb86b64ac.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT event_id, event_payload\n FROM system_flow_configuration_events\n WHERE system_flow_type = ($1::text)::system_flow_type\n AND (cast($2 as INT8) IS NULL or event_id > $2)\n AND (cast($3 as INT8) IS NULL or event_id <= $3)\n ", + "query": "\n SELECT event_id, event_payload\n FROM flow_configuration_events\n WHERE system_flow_type = ($1::text)::system_flow_type\n AND (cast($2 as INT8) IS NULL or event_id > $2)\n AND (cast($3 as INT8) IS NULL or event_id <= $3)\n ", "describe": { "columns": [ { @@ -26,5 +26,5 @@ false ] }, - "hash": "bd7fa47e13defd9dc6b7b10cfb7b0c2a3383fdae83e2e9cdc46f3121bf10c8b2" + "hash": "d6e99f6892fe26dad007d61005080963094a4a1e892184b6a7f3a09fb86b64ac" } diff --git a/src/infra/flow-system/postgres/.sqlx/query-fdf6f1724d5fc1bb65cea61e4ddb6f998ed5a415a564f8534c67a7297468bb69.json b/src/infra/flow-system/postgres/.sqlx/query-fdf6f1724d5fc1bb65cea61e4ddb6f998ed5a415a564f8534c67a7297468bb69.json new file mode 100644 index 000000000..0499fef36 --- /dev/null +++ b/src/infra/flow-system/postgres/.sqlx/query-fdf6f1724d5fc1bb65cea61e4ddb6f998ed5a415a564f8534c67a7297468bb69.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT flow_id FROM flows\n WHERE dataset_id = $1 AND\n dataset_flow_type = $2 AND\n flow_status != 'finished'::flow_status_type\n ORDER BY flow_id DESC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "flow_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Text", + { + "Custom": { + "name": "dataset_flow_type", + "kind": { + "Enum": [ + "ingest", + "execute_transform", + "hard_compaction", + "reset" + ] + } + } + } + ] + }, + "nullable": [ + false + ] + }, + "hash": "fdf6f1724d5fc1bb65cea61e4ddb6f998ed5a415a564f8534c67a7297468bb69" +} diff --git a/src/infra/flow-system/postgres/src/lib.rs b/src/infra/flow-system/postgres/src/lib.rs index 7f1427eb1..912dbb21f 100644 --- a/src/infra/flow-system/postgres/src/lib.rs +++ b/src/infra/flow-system/postgres/src/lib.rs @@ -11,5 +11,7 @@ pub use kamu_flow_system as domain; mod postgres_flow_configuration_event_store; +mod postgres_flow_event_store; pub use postgres_flow_configuration_event_store::*; +pub use postgres_flow_event_store::*; diff --git a/src/infra/flow-system/postgres/src/postgres_flow_configuration_event_store.rs b/src/infra/flow-system/postgres/src/postgres_flow_configuration_event_store.rs index 4bc6ffa57..f9d20a385 100644 --- a/src/infra/flow-system/postgres/src/postgres_flow_configuration_event_store.rs +++ b/src/infra/flow-system/postgres/src/postgres_flow_configuration_event_store.rs @@ -29,15 +29,14 @@ impl PostgresFlowConfigurationEventStore { } } - async fn get_system_events( + fn get_system_events( &self, fk_system: FlowKeySystem, maybe_from_id: Option, maybe_to_id: Option, ) -> EventStream { - let mut tr = self.transaction.lock().await; - Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -45,7 +44,7 @@ impl PostgresFlowConfigurationEventStore { let mut query_stream = sqlx::query!( r#" SELECT event_id, event_payload - FROM system_flow_configuration_events + FROM flow_configuration_events WHERE system_flow_type = ($1::text)::system_flow_type AND (cast($2 as INT8) IS NULL or event_id > $2) AND (cast($3 as INT8) IS NULL or event_id <= $3) @@ -68,15 +67,14 @@ impl PostgresFlowConfigurationEventStore { }) } - async fn get_dataset_events( + fn get_dataset_events( &self, fk_dataset: FlowKeyDataset, maybe_from_id: Option, maybe_to_id: Option, ) -> EventStream { - let mut tr = self.transaction.lock().await; - Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -84,7 +82,7 @@ impl PostgresFlowConfigurationEventStore { let mut query_stream = sqlx::query!( r#" SELECT event_id, event_payload - FROM dataset_flow_configuration_events + FROM flow_configuration_events WHERE dataset_id = $1 AND dataset_flow_type = ($2::text)::dataset_flow_type AND (cast($3 as INT8) IS NULL or event_id > $3) @@ -114,7 +112,7 @@ impl PostgresFlowConfigurationEventStore { #[async_trait::async_trait] impl EventStore for PostgresFlowConfigurationEventStore { - async fn get_events( + fn get_events( &self, flow_key: &FlowKey, opts: GetEventsOpts, @@ -125,11 +123,9 @@ impl EventStore for PostgresFlowConfigurationEventStore match flow_key.clone() { FlowKey::Dataset(fk_dataset) => { self.get_dataset_events(fk_dataset, maybe_from_id, maybe_to_id) - .await } FlowKey::System(fk_system) => { self.get_system_events(fk_system, maybe_from_id, maybe_to_id) - .await } } } @@ -150,7 +146,7 @@ impl EventStore for PostgresFlowConfigurationEventStore FlowKey::Dataset(fk_dataset) => { let mut query_builder = QueryBuilder::::new( r#" - INSERT INTO dataset_flow_configuration_events (dataset_id, dataset_flow_type, event_type, event_time, event_payload) + INSERT INTO flow_configuration_events (dataset_id, dataset_flow_type, event_type, event_time, event_payload) "#, ); @@ -167,7 +163,7 @@ impl EventStore for PostgresFlowConfigurationEventStore FlowKey::System(fk_system) => { let mut query_builder = QueryBuilder::::new( r#" - INSERT INTO system_flow_configuration_events (system_flow_type, event_type, event_time, event_payload) + INSERT INTO flow_configuration_events (system_flow_type, event_type, event_time, event_payload) "#, ); @@ -207,19 +203,15 @@ impl EventStore for PostgresFlowConfigurationEventStore // `bignumeric`, which is not suitable for us let result = sqlx::query!( r#" - SELECT SUM(event_count)::BIGINT as count - FROM (SELECT COUNT(event_id) as event_count - FROM dataset_flow_configuration_events - UNION ALL - SELECT COUNT(event_id) as event_count - FROM system_flow_configuration_events) as counts; + SELECT COUNT(event_id) AS events_count + FROM flow_configuration_events "#, ) .fetch_one(connection_mut) .await .int_err()?; - let count = usize::try_from(result.count.unwrap()).int_err()?; + let count = usize::try_from(result.events_count.unwrap()).int_err()?; Ok(count) } @@ -229,21 +221,23 @@ impl EventStore for PostgresFlowConfigurationEventStore #[async_trait::async_trait] impl FlowConfigurationEventStore for PostgresFlowConfigurationEventStore { - async fn list_all_dataset_ids(&self) -> FailableDatasetIDStream<'_> { - let mut tr = self.transaction.lock().await; - + fn list_all_dataset_ids(&self) -> FailableDatasetIDStream<'_> { Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; let mut query_stream = sqlx::query!( r#" SELECT DISTINCT dataset_id - FROM dataset_flow_configuration_events - WHERE event_type = 'FlowConfigurationEventCreated' + FROM flow_configuration_events + WHERE + dataset_id IS NOT NULL AND + event_type = 'FlowConfigurationEventCreated' "#, ) .try_map(|event_row| { - DatasetID::from_did_str(event_row.dataset_id.as_str()) + DatasetID::from_did_str(event_row.dataset_id.unwrap().as_str()) .map_err(|e| sqlx::Error::Decode(Box::new(e))) }) .fetch(connection_mut) diff --git a/src/infra/flow-system/postgres/src/postgres_flow_event_store.rs b/src/infra/flow-system/postgres/src/postgres_flow_event_store.rs new file mode 100644 index 000000000..08066eea3 --- /dev/null +++ b/src/infra/flow-system/postgres/src/postgres_flow_event_store.rs @@ -0,0 +1,738 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashSet; + +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; +use dill::*; +use futures::TryStreamExt; +use kamu_flow_system::*; +use opendatafabric::{AccountID, DatasetID}; +use sqlx::{FromRow, Postgres, QueryBuilder}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +const SYSTEM_INITIATOR: &str = ""; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct PostgresFlowEventStore { + transaction: TransactionRefT, +} + +#[component(pub)] +#[interface(dyn FlowEventStore)] +impl PostgresFlowEventStore { + pub fn new(transaction: TransactionRef) -> Self { + Self { + transaction: transaction.into(), + } + } + + fn prepare_initiator_filter(by_initiator: &InitiatorFilter) -> Vec { + match by_initiator { + InitiatorFilter::System => vec![SYSTEM_INITIATOR.to_string()], + InitiatorFilter::Account(a) => a.iter().map(ToString::to_string).collect(), + } + } + + async fn save_flow_updates_from_events( + &self, + tr: &mut database_common::TransactionGuard<'_, Postgres>, + events: &[FlowEvent], + ) -> Result<(), SaveEventsError> { + for event in events { + let event_flow_id: i64 = (event.flow_id()).try_into().unwrap(); + + if let FlowEvent::Initiated(e) = &event { + let connection_mut = tr.connection_mut().await?; + let initiator = e + .trigger + .initiator_account_id() + .map_or_else(|| SYSTEM_INITIATOR.to_string(), ToString::to_string); + + match &e.flow_key { + FlowKey::Dataset(fk_dataset) => { + sqlx::query!( + r#" + INSERT INTO flows (flow_id, dataset_id, dataset_flow_type, initiator, flow_status) + VALUES ($1, $2, $3, $4, $5) + "#, + event_flow_id, + fk_dataset.dataset_id.to_string(), + fk_dataset.flow_type as DatasetFlowType, + initiator, + FlowStatus::Waiting as FlowStatus, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + FlowKey::System(fk_system) => { + sqlx::query!( + r#" + INSERT INTO flows (flow_id, system_flow_type, initiator, flow_status) + VALUES ($1, $2, $3, $4) + "#, + event_flow_id, + fk_system.flow_type as SystemFlowType, + initiator, + FlowStatus::Waiting as FlowStatus, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + } + } + /* Existing flow, update status */ + else if let Some(new_status) = event.new_status() { + let connection_mut = tr.connection_mut().await?; + sqlx::query!( + r#" + UPDATE flows + SET flow_status = $2 + WHERE flow_id = $1 + "#, + event_flow_id, + new_status as FlowStatus, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + } + + Ok(()) + } + + async fn save_events_impl( + &self, + tr: &mut database_common::TransactionGuard<'_, Postgres>, + events: &[FlowEvent], + ) -> Result { + let connection_mut = tr.connection_mut().await?; + + #[derive(FromRow)] + struct ResultRow { + event_id: i64, + } + + let mut query_builder = QueryBuilder::::new( + r#" + INSERT INTO flow_events (flow_id, event_time, event_type, event_payload) + "#, + ); + + query_builder.push_values(events, |mut b, event| { + let event_flow_id: i64 = (event.flow_id()).try_into().unwrap(); + b.push_bind(event_flow_id); + b.push_bind(event.event_time()); + b.push_bind(event.typename()); + b.push_bind(serde_json::to_value(event).unwrap()); + }); + + query_builder.push("RETURNING event_id"); + + let rows = query_builder + .build_query_as::() + .fetch_all(connection_mut) + .await + .int_err()?; + + let last_event_id = rows.last().unwrap().event_id; + Ok(EventID::new(last_event_id)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl EventStore for PostgresFlowEventStore { + fn get_events(&self, flow_id: &FlowID, opts: GetEventsOpts) -> EventStream { + let flow_id: i64 = (*flow_id).try_into().unwrap(); + let maybe_from_id = opts.from.map(EventID::into_inner); + let maybe_to_id = opts.to.map(EventID::into_inner); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT event_id, event_payload + FROM flow_events + WHERE flow_id = $1 + AND (cast($2 as INT8) IS NULL OR event_id > $2) + AND (cast($3 as INT8) IS NULL OR event_id <= $3) + "#, + flow_id, + maybe_from_id, + maybe_to_id, + ).try_map(|event_row| { + let event = serde_json::from_value::(event_row.event_payload) + .map_err(|e| sqlx::Error::Decode(Box::new(e)))?; + + Ok((EventID::new(event_row.event_id), event)) + }) + .fetch(connection_mut) + .map_err(|e| GetEventsError::Internal(e.int_err())); + + while let Some((event_id, event)) = query_stream.try_next().await? { + yield Ok((event_id, event)); + } + }) + } + + async fn save_events( + &self, + _flow_id: &FlowID, + events: Vec, + ) -> Result { + if events.is_empty() { + return Err(SaveEventsError::NothingToSave); + } + + let mut tr = self.transaction.lock().await; + + self.save_flow_updates_from_events(&mut tr, &events).await?; + let last_event_id = self.save_events_impl(&mut tr, &events).await?; + + Ok(last_event_id) + } + + async fn len(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT COUNT(event_id) AS events_count + FROM flow_events + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let count = usize::try_from(result.events_count.unwrap()).int_err()?; + Ok(count) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl FlowEventStore for PostgresFlowEventStore { + async fn new_flow_id(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT nextval('flow_id_seq') AS new_flow_id + "# + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let flow_id = result.new_flow_id.unwrap(); + Ok(FlowID::try_from(flow_id).unwrap()) + } + + async fn try_get_pending_flow( + &self, + flow_key: &FlowKey, + ) -> Result, InternalError> { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_flow_id = match flow_key { + FlowKey::Dataset(flow_key_dataset) => sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE dataset_id = $1 AND + dataset_flow_type = $2 AND + flow_status != 'finished'::flow_status_type + ORDER BY flow_id DESC + LIMIT 1 + "#, + flow_key_dataset.dataset_id.to_string(), + flow_key_dataset.flow_type as DatasetFlowType, + ) + .fetch_optional(connection_mut) + .await + .int_err()? + .map(|r| r.flow_id), + + FlowKey::System(flow_key_system) => sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE system_flow_type = $1 AND + flow_status != 'finished'::flow_status_type + ORDER BY flow_id DESC + LIMIT 1 + "#, + flow_key_system.flow_type as SystemFlowType, + ) + .fetch_optional(connection_mut) + .await + .int_err()? + .map(|r| r.flow_id), + }; + + Ok(maybe_flow_id.map(|id| FlowID::try_from(id).unwrap())) + } + + async fn get_dataset_flow_run_stats( + &self, + dataset_id: &DatasetID, + flow_type: DatasetFlowType, + ) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + let maybe_attempt_result = sqlx::query!( + r#" + SELECT attempt.last_event_time as last_attempt_time + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + f.dataset_id = $1 AND + f.dataset_flow_type = $2 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS attempt + "#, + dataset_id.to_string(), + flow_type as DatasetFlowType + ) + .map(|event_row| event_row.last_attempt_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + let connection_mut = tr.connection_mut().await?; + let maybe_success_result = sqlx::query!( + r#" + SELECT success.last_event_time as last_success_time + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + e.event_payload::json#>'{TaskFinished,task_outcome,Success}' IS NOT NULL AND + f.dataset_id = $1 AND + f.dataset_flow_type = $2 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS success + "#, + dataset_id.to_string(), + flow_type as DatasetFlowType + ) + .map(|event_row| event_row.last_success_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + Ok(FlowRunStats { + last_attempt_time: maybe_attempt_result, + last_success_time: maybe_success_result, + }) + } + + async fn get_system_flow_run_stats( + &self, + flow_type: SystemFlowType, + ) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + let maybe_attempt_result = sqlx::query!( + r#" + SELECT attempt.last_event_time as last_attempt_time + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + f.system_flow_type = $1 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS attempt + "#, + flow_type as SystemFlowType + ) + .map(|event_row| event_row.last_attempt_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + let connection_mut = tr.connection_mut().await?; + let maybe_success_result = sqlx::query!( + r#" + SELECT success.last_event_time as last_success_time + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + e.event_payload::json#>'{TaskFinished,task_outcome,Success}' IS NOT NULL AND + f.system_flow_type = $1 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS success + "#, + flow_type as SystemFlowType + ) + .map(|event_row| event_row.last_success_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + Ok(FlowRunStats { + last_attempt_time: maybe_attempt_result, + last_success_time: maybe_success_result, + }) + } + + fn get_all_flow_ids_by_dataset( + &self, + dataset_id: &DatasetID, + filters: &DatasetFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { + let dataset_id = dataset_id.to_string(); + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let by_flow_type = filters.by_flow_type; + let by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE dataset_id = $1 + AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2) + AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3) + AND (cast($4 as TEXT[]) IS NULL OR initiator = ANY($4)) + ORDER BY flow_id DESC + LIMIT $5 OFFSET $6 + "#, + dataset_id, + by_flow_type as Option, + by_flow_status as Option, + maybe_initiators as Option>, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), + ).try_map(|event_row| { + let flow_id = event_row.flow_id; + Ok(FlowID::try_from(flow_id).unwrap()) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + async fn get_count_flows_by_dataset( + &self, + dataset_id: &DatasetID, + filters: &DatasetFlowFilters, + ) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let query_result = sqlx::query!( + r#" + SELECT COUNT(flow_id) AS flows_count + FROM flows + WHERE dataset_id = $1 + AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2) + AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3) + AND (cast($4 as TEXT[]) IS NULL OR initiator = ANY($4)) + "#, + dataset_id.to_string(), + filters.by_flow_type as Option, + filters.by_flow_status as Option, + maybe_initiators as Option> + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let flows_count = query_result.flows_count.unwrap_or_default(); + Ok(usize::try_from(flows_count).unwrap()) + } + + fn get_all_flow_ids_by_datasets( + &self, + dataset_ids: HashSet, + filters: &DatasetFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { + let dataset_ids: Vec<_> = dataset_ids.iter().map(ToString::to_string).collect(); + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let by_flow_type = filters.by_flow_type; + let by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE dataset_id = ANY($1) + AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2) + AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3) + AND (cast($4 as TEXT[]) IS NULL OR initiator = ANY($4)) + ORDER BY flow_id DESC + LIMIT $5 OFFSET $6 + "#, + dataset_ids as Vec, + by_flow_type as Option, + by_flow_status as Option, + maybe_initiators as Option>, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), + ).try_map(|event_row| { + let flow_id = event_row.flow_id; + Ok(FlowID::try_from(flow_id).unwrap()) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + fn get_unique_flow_initiator_ids_by_dataset( + &self, + dataset_id: &DatasetID, + ) -> InitiatorIDStream { + let dataset_id = dataset_id.to_string(); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT DISTINCT(initiator) FROM flows + WHERE dataset_id = $1 AND initiator != $2 + "#, + dataset_id, + SYSTEM_INITIATOR, + ).try_map(|event_row| { + Ok(AccountID::from_did_str(&event_row.initiator).unwrap()) + }) + .fetch(connection_mut); + + while let Some(initiator) = query_stream.try_next().await.int_err()? { + yield Ok(initiator); + } + }) + } + + fn get_all_system_flow_ids( + &self, + filters: &SystemFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let maybe_by_flow_type = filters.by_flow_type; + let maybe_by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE system_flow_type IS NOT NULL + AND (cast($1 as system_flow_type) IS NULL OR system_flow_type = $1) + AND (cast($2 as flow_status_type) IS NULL or flow_status = $2) + AND (cast($3 as TEXT[]) IS NULL OR initiator = ANY($3)) + ORDER BY flow_id DESC + LIMIT $4 OFFSET $5 + "#, + maybe_by_flow_type as Option, + maybe_by_flow_status as Option, + maybe_initiators as Option>, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), + ).try_map(|event_row| { + let flow_id = event_row.flow_id; + Ok(FlowID::try_from(flow_id).unwrap()) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + async fn get_count_system_flows( + &self, + filters: &SystemFlowFilters, + ) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let query_result = sqlx::query!( + r#" + SELECT COUNT(flow_id) AS flows_count + FROM flows + WHERE system_flow_type IS NOT NULL + AND (cast($1 as system_flow_type) IS NULL OR system_flow_type = $1) + AND (cast($2 as flow_status_type) IS NULL or flow_status = $2) + AND (cast($3 as TEXT[]) IS NULL OR initiator = ANY($3)) + "#, + filters.by_flow_type as Option, + filters.by_flow_status as Option, + maybe_initiators as Option>, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let flows_count = query_result.flows_count.unwrap_or_default(); + Ok(usize::try_from(flows_count).unwrap()) + } + + fn get_all_flow_ids( + &self, + filters: &AllFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream<'_> { + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let maybe_by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE + (cast($1 as flow_status_type) IS NULL or flow_status = $1) + AND (cast($2 as TEXT[]) IS NULL OR initiator = ANY($2)) + ORDER BY flow_id DESC + LIMIT $3 OFFSET $4 + "#, + maybe_by_flow_status as Option, + maybe_initiators as Option>, + i64::try_from(pagination.limit).unwrap(), + i64::try_from(pagination.offset).unwrap(), + ).try_map(|event_row| { + let flow_id = event_row.flow_id; + Ok(FlowID::try_from(flow_id).unwrap()) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + async fn get_count_all_flows(&self, filters: &AllFlowFilters) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let maybe_by_flow_status = filters.by_flow_status; + + let query_result = sqlx::query!( + r#" + SELECT COUNT(flow_id) AS flows_count + FROM flows + WHERE + (cast($1 as flow_status_type) IS NULL or flow_status = $1) + AND (cast($2 as TEXT[]) IS NULL OR initiator = ANY($2)) + "#, + maybe_by_flow_status as Option, + maybe_initiators as Option>, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let flows_count = query_result.flows_count.unwrap_or_default(); + Ok(usize::try_from(flows_count).unwrap()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/flow-system/postgres/tests/tests/mod.rs b/src/infra/flow-system/postgres/tests/tests/mod.rs index 56cd0c800..c06f5b79e 100644 --- a/src/infra/flow-system/postgres/tests/tests/mod.rs +++ b/src/infra/flow-system/postgres/tests/tests/mod.rs @@ -8,3 +8,4 @@ // by the Apache License, Version 2.0. mod test_postgres_flow_configuration_event_store; +mod test_postgres_flow_event_store; diff --git a/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_configuration_event_store.rs b/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_configuration_event_store.rs index 50e2d8e69..59515a33c 100644 --- a/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_configuration_event_store.rs +++ b/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_configuration_event_store.rs @@ -17,7 +17,8 @@ use sqlx::PgPool; database_transactional_test!( storage = postgres, - fixture = kamu_flow_system_repo_tests::test_event_store_empty, + fixture = + kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_empty, harness = PostgresFlowConfigurationEventStoreHarness ); @@ -25,7 +26,7 @@ database_transactional_test!( database_transactional_test!( storage = postgres, - fixture = kamu_flow_system_repo_tests::test_event_store_get_streams, + fixture = kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_get_streams, harness = PostgresFlowConfigurationEventStoreHarness ); @@ -33,10 +34,12 @@ database_transactional_test!( database_transactional_test!( storage = postgres, - fixture = kamu_flow_system_repo_tests::test_event_store_get_events_with_windowing, + fixture = kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_get_events_with_windowing, harness = PostgresFlowConfigurationEventStoreHarness ); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Harness //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct PostgresFlowConfigurationEventStoreHarness { diff --git a/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_event_store.rs b/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_event_store.rs new file mode 100644 index 000000000..d5df9a662 --- /dev/null +++ b/src/infra/flow-system/postgres/tests/tests/test_postgres_flow_event_store.rs @@ -0,0 +1,278 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::PostgresTransactionManager; +use database_common_macros::database_transactional_test; +use dill::{Catalog, CatalogBuilder}; +use kamu_flow_system_postgres::PostgresFlowEventStore; +use sqlx::PgPool; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_empty_filters_distingush_dataset, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_status, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_flow_type, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_initiator, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_initiator_with_multiple_variants, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_combinations, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets_and_status, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets_with_pagination, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_pagination, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_pagination_with_filters, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_get_flow_initiators, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_unfiltered_system_flows, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_flow_type, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_flow_status, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_initiator, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_complex_filter, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_pagination, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_pagination_with_filters, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_unpaged, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_pagination, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_filters, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_run_stats, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_run_stats, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_dataset_single_type_crud, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_dataset_multiple_types_crud, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_multiple_datasets_crud, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_system_flow_crud, + harness = PostgresFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Harness +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct PostgresFlowEventStoreHarness { + catalog: Catalog, +} + +impl PostgresFlowEventStoreHarness { + pub fn new(pg_pool: PgPool) -> Self { + // Initialize catalog with predefined Postgres pool + let mut catalog_builder = CatalogBuilder::new(); + catalog_builder.add_value(pg_pool); + catalog_builder.add::(); + catalog_builder.add::(); + + Self { + catalog: catalog_builder.build(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/flow-system/repo-tests/Cargo.toml b/src/infra/flow-system/repo-tests/Cargo.toml index 7105ed316..31851f476 100644 --- a/src/infra/flow-system/repo-tests/Cargo.toml +++ b/src/infra/flow-system/repo-tests/Cargo.toml @@ -22,8 +22,10 @@ doctest = false [dependencies] +database-common = { workspace = true } opendatafabric = { workspace = true } kamu-flow-system = { workspace = true } +kamu-task-system = { workspace = true } chrono = { version = "0.4", default-features = false } dill = "0.9" diff --git a/src/infra/flow-system/repo-tests/src/lib.rs b/src/infra/flow-system/repo-tests/src/lib.rs index aac6d79d2..ad75e395a 100644 --- a/src/infra/flow-system/repo-tests/src/lib.rs +++ b/src/infra/flow-system/repo-tests/src/lib.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -mod flow_configuration_repository_test_suite; +#![feature(assert_matches)] -pub use flow_configuration_repository_test_suite::*; +pub mod test_flow_configuration_event_store; +pub mod test_flow_event_store; diff --git a/src/infra/flow-system/repo-tests/src/flow_configuration_repository_test_suite.rs b/src/infra/flow-system/repo-tests/src/test_flow_configuration_event_store.rs similarity index 98% rename from src/infra/flow-system/repo-tests/src/flow_configuration_repository_test_suite.rs rename to src/infra/flow-system/repo-tests/src/test_flow_configuration_event_store.rs index a09f20708..60766ce18 100644 --- a/src/infra/flow-system/repo-tests/src/flow_configuration_repository_test_suite.rs +++ b/src/infra/flow-system/repo-tests/src/test_flow_configuration_event_store.rs @@ -30,7 +30,6 @@ pub async fn test_event_store_empty(catalog: &Catalog) { ); let events: Vec<_> = event_store .get_events(&flow_key, GetEventsOpts::default()) - .await .try_collect() .await .unwrap(); @@ -39,7 +38,6 @@ pub async fn test_event_store_empty(catalog: &Catalog) { let dataset_ids: Vec<_> = event_store .list_all_dataset_ids() - .await .try_collect() .await .unwrap(); @@ -128,7 +126,6 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { let events: Vec<_> = event_store .get_events(&flow_key_1, GetEventsOpts::default()) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -138,7 +135,6 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { let events: Vec<_> = event_store .get_events(&flow_key_2, GetEventsOpts::default()) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -148,7 +144,6 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { let events: Vec<_> = event_store .get_events(&flow_key_3, GetEventsOpts::default()) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -158,7 +153,6 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { let mut dataset_ids: Vec<_> = event_store .list_all_dataset_ids() - .await .try_collect() .await .unwrap(); @@ -232,7 +226,6 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { to: None, }, ) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -251,7 +244,6 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { )), }, ) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -269,7 +261,6 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { to: Some(EventID::new(latest_event_id.into_inner() - 1)), }, ) - .await .map_ok(|(_, event)| event) .try_collect() .await diff --git a/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs b/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs new file mode 100644 index 000000000..caad3f84b --- /dev/null +++ b/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs @@ -0,0 +1,2128 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::assert_matches::assert_matches; +use std::collections::HashSet; +use std::sync::Arc; + +use chrono::{Duration, Utc}; +use database_common::PaginationOpts; +use dill::Catalog; +use futures::TryStreamExt; +use kamu_flow_system::*; +use kamu_task_system::{TaskError, TaskID, TaskOutcome, TaskResult}; +use opendatafabric::{AccountID, DatasetID}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_empty_filters_distingush_dataset(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let always_happy_filters = DatasetFlowFilters::default(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + let bar_cases = make_dataset_test_case(flow_event_store.clone()).await; + + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + always_happy_filters.clone(), + PaginationOpts { + offset: 0, + limit: 100, + }, + 6, + vec![ + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ) + .await; + + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &bar_cases, + always_happy_filters.clone(), + PaginationOpts { + offset: 0, + limit: 100, + }, + 6, + vec![ + bar_cases.compaction_flow_ids.flow_id_finished, + bar_cases.compaction_flow_ids.flow_id_running, + bar_cases.compaction_flow_ids.flow_id_waiting, + bar_cases.ingest_flow_ids.flow_id_finished, + bar_cases.ingest_flow_ids.flow_id_running, + bar_cases.ingest_flow_ids.flow_id_waiting, + ], + ) + .await; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_status(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Running), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_running, + ], + ), + ( + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Finished), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_finished, + ], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_flow_type(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + DatasetFlowFilters { + by_flow_type: Some(DatasetFlowType::Ingest), + ..Default::default() + }, + vec![ + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + DatasetFlowFilters { + by_flow_type: Some(DatasetFlowType::HardCompaction), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + ], + ), + ( + DatasetFlowFilters { + by_flow_type: Some(DatasetFlowType::ExecuteTransform), + ..Default::default() + }, + vec![], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_initiator(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let wasya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"wasya")]); + let petya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"petya")]); + + let cases = vec![ + ( + DatasetFlowFilters { + by_initiator: Some(InitiatorFilter::Account(wasya_filter)), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_running, + ], + ), + ( + DatasetFlowFilters { + by_initiator: Some(InitiatorFilter::Account(petya_filter)), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + DatasetFlowFilters { + by_initiator: Some(InitiatorFilter::System), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_finished, + ], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_initiator_with_multiple_variants(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let wasya_patya_filter = HashSet::from_iter([ + AccountID::new_seeded_ed25519(b"wasya"), + AccountID::new_seeded_ed25519(b"petya"), + ]); + let mut wasya_patya_unrelated_filter = wasya_patya_filter.clone(); + wasya_patya_unrelated_filter.insert(AccountID::new_seeded_ed25519(b"unrelated_user")); + + let cases = vec![ + ( + DatasetFlowFilters { + by_initiator: Some(InitiatorFilter::Account(wasya_patya_filter)), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + // should return the same amount even if some non existing user was provided + ( + DatasetFlowFilters { + by_initiator: Some(InitiatorFilter::Account(wasya_patya_unrelated_filter)), + ..Default::default() + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_combinations(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + let petya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"petya")]); + + let cases = vec![ + ( + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Finished), + by_flow_type: Some(DatasetFlowType::Ingest), + by_initiator: Some(InitiatorFilter::System), + }, + vec![foo_cases.ingest_flow_ids.flow_id_finished], + ), + ( + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + by_flow_type: Some(DatasetFlowType::HardCompaction), + by_initiator: Some(InitiatorFilter::Account(petya_filter)), + }, + vec![foo_cases.compaction_flow_ids.flow_id_waiting], + ), + ( + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Running), + by_flow_type: Some(DatasetFlowType::Ingest), + by_initiator: Some(InitiatorFilter::System), + }, + vec![], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_datasets(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + let bar_cases = make_dataset_test_case(flow_event_store.clone()).await; + make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + vec![foo_cases.dataset_id.clone()], + vec![ + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + vec![foo_cases.dataset_id.clone(), bar_cases.dataset_id.clone()], + vec![ + bar_cases.compaction_flow_ids.flow_id_finished, + bar_cases.compaction_flow_ids.flow_id_running, + bar_cases.compaction_flow_ids.flow_id_waiting, + bar_cases.ingest_flow_ids.flow_id_finished, + bar_cases.ingest_flow_ids.flow_id_running, + bar_cases.ingest_flow_ids.flow_id_waiting, + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + (vec![DatasetID::new_seeded_ed25519(b"wrong")], vec![]), + ]; + + for (dataset_ids, expected_flow_ids) in cases { + assert_multiple_dataset_flow_expectations( + flow_event_store.clone(), + dataset_ids, + DatasetFlowFilters::default(), + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_datasets_and_status(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + let bar_cases = make_dataset_test_case(flow_event_store.clone()).await; + make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + vec![foo_cases.dataset_id.clone()], + vec![ + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + vec![foo_cases.dataset_id.clone(), bar_cases.dataset_id.clone()], + vec![ + bar_cases.compaction_flow_ids.flow_id_waiting, + bar_cases.ingest_flow_ids.flow_id_waiting, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + (vec![DatasetID::new_seeded_ed25519(b"wrong")], vec![]), + ]; + + for (dataset_ids, expected_flow_ids) in cases { + assert_multiple_dataset_flow_expectations( + flow_event_store.clone(), + dataset_ids, + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + ..Default::default() + }, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_filter_by_datasets_with_pagination(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + let bar_cases = make_dataset_test_case(flow_event_store.clone()).await; + make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + vec![foo_cases.dataset_id.clone()], + vec![ + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + ], + PaginationOpts { + offset: 2, + limit: 3, + }, + ), + ( + vec![foo_cases.dataset_id.clone(), bar_cases.dataset_id.clone()], + vec![ + bar_cases.compaction_flow_ids.flow_id_running, + bar_cases.compaction_flow_ids.flow_id_waiting, + bar_cases.ingest_flow_ids.flow_id_finished, + bar_cases.ingest_flow_ids.flow_id_running, + bar_cases.ingest_flow_ids.flow_id_waiting, + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + ], + PaginationOpts { + offset: 1, + limit: 7, + }, + ), + ( + vec![DatasetID::new_seeded_ed25519(b"wrong")], + vec![], + PaginationOpts { + offset: 0, + limit: 100, + }, + ), + ]; + + for (dataset_ids, expected_flow_ids, pagination) in cases { + assert_multiple_dataset_flow_expectations( + flow_event_store.clone(), + dataset_ids, + DatasetFlowFilters::default(), + pagination, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_pagination(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + PaginationOpts { + offset: 0, + limit: 2, + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + ], + ), + ( + PaginationOpts { + offset: 2, + limit: 3, + }, + vec![ + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + ], + ), + ( + PaginationOpts { + offset: 4, + limit: 2, + }, + vec![ + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + PaginationOpts { + offset: 5, + limit: 2, + }, + vec![foo_cases.ingest_flow_ids.flow_id_waiting], + ), + ( + PaginationOpts { + offset: 6, + limit: 5, + }, + vec![], + ), + ]; + + for (pagination, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + Default::default(), + pagination, + 6, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_pagination_with_filters(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + PaginationOpts { + offset: 0, + limit: 2, + }, + DatasetFlowFilters { + by_flow_type: Some(DatasetFlowType::Ingest), + ..Default::default() + }, + 3, + vec![ + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + ], + ), + ( + PaginationOpts { + offset: 1, + limit: 2, + }, + DatasetFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + ..Default::default() + }, + 2, + vec![foo_cases.ingest_flow_ids.flow_id_waiting], + ), + ( + PaginationOpts { + offset: 1, + limit: 2, + }, + DatasetFlowFilters { + by_initiator: Some(InitiatorFilter::System), + ..Default::default() + }, + 2, + vec![foo_cases.ingest_flow_ids.flow_id_finished], + ), + ]; + + for (pagination, filters, expected_total_count, expected_flow_ids) in cases { + assert_dataset_flow_expectaitons( + flow_event_store.clone(), + &foo_cases, + filters, + pagination, + expected_total_count, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_get_flow_initiators(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let res: HashSet<_> = flow_event_store + .get_unique_flow_initiator_ids_by_dataset(&foo_cases.dataset_id) + .try_collect() + .await + .unwrap(); + assert_eq!( + res, + HashSet::from([ + AccountID::new_seeded_ed25519(b"petya"), + AccountID::new_seeded_ed25519(b"wasya"), + ]) + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_unfiltered_system_flows(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + assert_system_flow_expectaitons( + flow_event_store.clone(), + SystemFlowFilters::default(), + PaginationOpts { + offset: 0, + limit: 100, + }, + 3, + vec![ + system_case.gc_flow_ids.flow_id_finished, + system_case.gc_flow_ids.flow_id_running, + system_case.gc_flow_ids.flow_id_waiting, + ], + ) + .await; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flows_filtered_by_flow_type(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![( + SystemFlowFilters { + by_flow_type: Some(SystemFlowType::GC), + ..Default::default() + }, + vec![ + system_case.gc_flow_ids.flow_id_finished, + system_case.gc_flow_ids.flow_id_running, + system_case.gc_flow_ids.flow_id_waiting, + ], + )]; + + for (filters, expected_flow_ids) in cases { + assert_system_flow_expectaitons( + flow_event_store.clone(), + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flows_filtered_by_flow_status(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + SystemFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + ..Default::default() + }, + vec![system_case.gc_flow_ids.flow_id_waiting], + ), + ( + SystemFlowFilters { + by_flow_status: Some(FlowStatus::Running), + ..Default::default() + }, + vec![system_case.gc_flow_ids.flow_id_running], + ), + ( + SystemFlowFilters { + by_flow_status: Some(FlowStatus::Finished), + ..Default::default() + }, + vec![system_case.gc_flow_ids.flow_id_finished], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_system_flow_expectaitons( + flow_event_store.clone(), + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flows_filtered_by_initiator(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + let wasya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"wasya")]); + let unrelated_user_filter = + HashSet::from_iter([AccountID::new_seeded_ed25519(b"unrelated-user")]); + + let cases = vec![ + ( + SystemFlowFilters { + by_initiator: Some(InitiatorFilter::System), + ..Default::default() + }, + vec![system_case.gc_flow_ids.flow_id_finished], + ), + ( + SystemFlowFilters { + by_initiator: Some(InitiatorFilter::Account(wasya_filter)), + ..Default::default() + }, + vec![system_case.gc_flow_ids.flow_id_running], + ), + ( + SystemFlowFilters { + by_initiator: Some(InitiatorFilter::Account(unrelated_user_filter)), + ..Default::default() + }, + vec![], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_system_flow_expectaitons( + flow_event_store.clone(), + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flows_complex_filter(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + let petya_filter = HashSet::from_iter([AccountID::new_seeded_ed25519(b"petya")]); + + let cases = vec![ + ( + SystemFlowFilters { + by_flow_status: Some(FlowStatus::Finished), + by_initiator: Some(InitiatorFilter::System), + by_flow_type: Some(SystemFlowType::GC), + }, + vec![system_case.gc_flow_ids.flow_id_finished], + ), + ( + SystemFlowFilters { + by_initiator: Some(InitiatorFilter::Account(petya_filter)), + by_flow_status: Some(FlowStatus::Waiting), + by_flow_type: None, + }, + vec![system_case.gc_flow_ids.flow_id_waiting], + ), + ( + SystemFlowFilters { + by_flow_status: Some(FlowStatus::Running), + by_initiator: Some(InitiatorFilter::System), + by_flow_type: Some(SystemFlowType::GC), + }, + vec![], + ), + ]; + + for (filters, expected_flow_ids) in cases { + assert_system_flow_expectaitons( + flow_event_store.clone(), + filters, + PaginationOpts { + offset: 0, + limit: 100, + }, + expected_flow_ids.len(), + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flow_pagination(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + PaginationOpts { + offset: 0, + limit: 2, + }, + vec![ + system_case.gc_flow_ids.flow_id_finished, + system_case.gc_flow_ids.flow_id_running, + ], + ), + ( + PaginationOpts { + offset: 1, + limit: 2, + }, + vec![ + system_case.gc_flow_ids.flow_id_running, + system_case.gc_flow_ids.flow_id_waiting, + ], + ), + ( + PaginationOpts { + offset: 2, + limit: 2, + }, + vec![system_case.gc_flow_ids.flow_id_waiting], + ), + ( + PaginationOpts { + offset: 3, + limit: 5, + }, + vec![], + ), + ]; + + for (pagination, expected_flow_ids) in cases { + assert_system_flow_expectaitons( + flow_event_store.clone(), + Default::default(), + pagination, + 3, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flow_pagination_with_filters(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + PaginationOpts { + offset: 0, + limit: 2, + }, + SystemFlowFilters { + by_flow_type: Some(SystemFlowType::GC), + ..Default::default() + }, + 3, + vec![ + system_case.gc_flow_ids.flow_id_finished, + system_case.gc_flow_ids.flow_id_running, + ], + ), + ( + PaginationOpts { + offset: 0, + limit: 2, + }, + SystemFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + ..Default::default() + }, + 1, + vec![system_case.gc_flow_ids.flow_id_waiting], + ), + ( + PaginationOpts { + offset: 1, + limit: 2, + }, + SystemFlowFilters { + by_initiator: Some(InitiatorFilter::System), + ..Default::default() + }, + 1, + vec![], + ), + ]; + + for (pagination, filters, expected_total_count, expected_flow_ids) in cases { + assert_system_flow_expectaitons( + flow_event_store.clone(), + filters, + pagination, + expected_total_count, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_all_flows_unpaged(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + assert_all_flow_expectaitons( + flow_event_store.clone(), + AllFlowFilters::default(), + PaginationOpts { + offset: 0, + limit: 100, + }, + 9, + vec![ + system_case.gc_flow_ids.flow_id_finished, + system_case.gc_flow_ids.flow_id_running, + system_case.gc_flow_ids.flow_id_waiting, + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ) + .await; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_all_flows_pagination(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + let system_case = make_system_test_case(flow_event_store.clone()).await; + + let cases = vec![ + ( + PaginationOpts { + offset: 0, + limit: 2, + }, + vec![ + system_case.gc_flow_ids.flow_id_finished, + system_case.gc_flow_ids.flow_id_running, + ], + ), + ( + PaginationOpts { + offset: 2, + limit: 2, + }, + vec![ + system_case.gc_flow_ids.flow_id_waiting, + foo_cases.compaction_flow_ids.flow_id_finished, + ], + ), + ( + PaginationOpts { + offset: 7, + limit: 2, + }, + vec![ + foo_cases.ingest_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ), + ( + PaginationOpts { + offset: 8, + limit: 2, + }, + vec![foo_cases.ingest_flow_ids.flow_id_waiting], + ), + ( + PaginationOpts { + offset: 9, + limit: 1, + }, + vec![], + ), + ]; + + for (pagination, expected_flow_ids) in cases { + assert_all_flow_expectaitons( + flow_event_store.clone(), + AllFlowFilters::default(), + pagination, + 9, + expected_flow_ids, + ) + .await; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_all_flows_filters(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; + + let system_case = make_system_test_case(flow_event_store.clone()).await; + + assert_all_flow_expectaitons( + flow_event_store.clone(), + AllFlowFilters { + by_flow_status: Some(FlowStatus::Waiting), + by_initiator: None, + }, + PaginationOpts { + offset: 0, + limit: 100, + }, + 3, + vec![ + system_case.gc_flow_ids.flow_id_waiting, + foo_cases.compaction_flow_ids.flow_id_waiting, + foo_cases.ingest_flow_ids.flow_id_waiting, + ], + ) + .await; + + assert_all_flow_expectaitons( + flow_event_store.clone(), + AllFlowFilters { + by_flow_status: Some(FlowStatus::Running), + by_initiator: None, + }, + PaginationOpts { + offset: 0, + limit: 100, + }, + 3, + vec![ + system_case.gc_flow_ids.flow_id_running, + foo_cases.compaction_flow_ids.flow_id_running, + foo_cases.ingest_flow_ids.flow_id_running, + ], + ) + .await; + + assert_all_flow_expectaitons( + flow_event_store.clone(), + AllFlowFilters { + by_flow_status: Some(FlowStatus::Finished), + by_initiator: None, + }, + PaginationOpts { + offset: 0, + limit: 100, + }, + 3, + vec![ + system_case.gc_flow_ids.flow_id_finished, + foo_cases.compaction_flow_ids.flow_id_finished, + foo_cases.ingest_flow_ids.flow_id_finished, + ], + ) + .await; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +const EMPTY_STATS: FlowRunStats = FlowRunStats { + last_attempt_time: None, + last_success_time: None, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dataset_flow_run_stats(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let (_, dataset_id) = DatasetID::new_generated_ed25519(); + + // No stats initially + let stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_eq!(stats, EMPTY_STATS); + + // Schedule flow + + let flow_generator = DatasetFlowGenerator::new(&dataset_id, flow_event_store.clone()); + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id = flow_generator + .make_new_flow( + DatasetFlowType::Ingest, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + // Stats hasn't changed + + let stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_eq!(stats, EMPTY_STATS); + + // Flow starts running + flow_generator.start_running_flow(flow_id).await; + + // still no change + let stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_eq!(stats, EMPTY_STATS); + + // Flow successeds + flow_generator + .finish_running_flow(flow_id, TaskOutcome::Success(TaskResult::Empty)) + .await; + + // Finally, stats got updated + let stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_matches!( + stats, + FlowRunStats { + last_success_time: Some(success_time), + last_attempt_time: Some(attempt_time) + } if success_time == attempt_time + ); + + // Make another flow of the same type with the same dataset + + let flow_id = flow_generator + .make_new_flow( + DatasetFlowType::Ingest, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + // Still storing old stats + let new_stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_eq!(new_stats, stats); + + // Flow starts running + flow_generator.start_running_flow(flow_id).await; + + // Still storing old stats + let new_stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_eq!(new_stats, stats); + + // Now finish the flow with failure + flow_generator + .finish_running_flow(flow_id, TaskOutcome::Failed(TaskError::Empty)) + .await; + + // Stats got updated: success stayed as previously, attempt refreshed + let new_stats = flow_event_store + .get_dataset_flow_run_stats(&dataset_id, DatasetFlowType::Ingest) + .await + .unwrap(); + assert_matches!( + new_stats, + FlowRunStats { + last_success_time: Some(success_time), + last_attempt_time: Some(attempt_time) + } if success_time < attempt_time && success_time == stats.last_attempt_time.unwrap() + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_system_flow_run_stats(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + // No stats initially + let stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_eq!(stats, EMPTY_STATS); + + // Schedule flow + + let flow_generator = SystemFlowGenerator::new(flow_event_store.clone()); + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id = flow_generator + .make_new_flow( + SystemFlowType::GC, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + // Stats hasn't changed + + let stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_eq!(stats, EMPTY_STATS); + + // Flow starts running + flow_generator.start_running_flow(flow_id).await; + + // still no change + let stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_eq!(stats, EMPTY_STATS); + + // Flow successeds + flow_generator + .finish_running_flow(flow_id, TaskOutcome::Success(TaskResult::Empty)) + .await; + + // Finally, stats got updated + let stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_matches!( + stats, + FlowRunStats { + last_success_time: Some(success_time), + last_attempt_time: Some(attempt_time) + } if success_time == attempt_time + ); + + // Make another flow of the same type + + let flow_id = flow_generator + .make_new_flow( + SystemFlowType::GC, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + // Still storing old stats + let new_stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_eq!(new_stats, stats); + + // Flow starts running + flow_generator.start_running_flow(flow_id).await; + + // Still storing old stats + let new_stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_eq!(new_stats, stats); + + // Now finish the flow with failure + flow_generator + .finish_running_flow(flow_id, TaskOutcome::Failed(TaskError::Empty)) + .await; + + // Stats got updated: success stayed as previously, attempt refreshed + let new_stats = flow_event_store + .get_system_flow_run_stats(SystemFlowType::GC) + .await + .unwrap(); + assert_matches!( + new_stats, + FlowRunStats { + last_success_time: Some(success_time), + last_attempt_time: Some(attempt_time) + } if success_time < attempt_time && success_time == stats.last_attempt_time.unwrap() + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_pending_flow_dataset_single_type_crud(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let (_, dataset_id) = DatasetID::new_generated_ed25519(); + + let flow_key = FlowKey::dataset(dataset_id.clone(), DatasetFlowType::Ingest); + + // No pending yet + let res = flow_event_store + .try_get_pending_flow(&flow_key) + .await + .unwrap(); + assert!(res.is_none()); + + // Schedule flow + let flow_generator = DatasetFlowGenerator::new(&dataset_id, flow_event_store.clone()); + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id = flow_generator + .make_new_flow( + DatasetFlowType::Ingest, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + // Got pending + let res = flow_event_store + .try_get_pending_flow(&flow_key) + .await + .unwrap(); + assert_eq!(res, Some(flow_id)); + + // Flow starts running + flow_generator.start_running_flow(flow_id).await; + + // Got pending + let res = flow_event_store + .try_get_pending_flow(&flow_key) + .await + .unwrap(); + assert_eq!(res, Some(flow_id)); + + flow_generator + .finish_running_flow(flow_id, TaskOutcome::Success(TaskResult::Empty)) + .await; + + // No more pending + let res = flow_event_store + .try_get_pending_flow(&flow_key) + .await + .unwrap(); + assert!(res.is_none()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_pending_flow_dataset_multiple_types_crud(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let (_, dataset_id) = DatasetID::new_generated_ed25519(); + + let flow_key_ingest = FlowKey::dataset(dataset_id.clone(), DatasetFlowType::Ingest); + let flow_key_compact = FlowKey::dataset(dataset_id.clone(), DatasetFlowType::HardCompaction); + + // No pending yet + let res = flow_event_store + .try_get_pending_flow(&flow_key_ingest) + .await + .unwrap(); + assert!(res.is_none()); + let res = flow_event_store + .try_get_pending_flow(&flow_key_compact) + .await + .unwrap(); + assert!(res.is_none()); + + // Schedule flows + let flow_generator = DatasetFlowGenerator::new(&dataset_id, flow_event_store.clone()); + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id_ingest = flow_generator + .make_new_flow( + DatasetFlowType::Ingest, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + let flow_id_compact = flow_generator + .make_new_flow( + DatasetFlowType::HardCompaction, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + let res = flow_event_store + .try_get_pending_flow(&flow_key_ingest) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_ingest)); + let res = flow_event_store + .try_get_pending_flow(&flow_key_compact) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_compact)); + + // Flows start running + flow_generator.start_running_flow(flow_id_ingest).await; + flow_generator.start_running_flow(flow_id_compact).await; + + let res = flow_event_store + .try_get_pending_flow(&flow_key_ingest) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_ingest)); + let res = flow_event_store + .try_get_pending_flow(&flow_key_compact) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_compact)); + + // Ingest finishes with success + flow_generator + .finish_running_flow(flow_id_ingest, TaskOutcome::Success(TaskResult::Empty)) + .await; + + let res = flow_event_store + .try_get_pending_flow(&flow_key_ingest) + .await + .unwrap(); + assert!(res.is_none()); + let res = flow_event_store + .try_get_pending_flow(&flow_key_compact) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_compact)); + + // Compact finishes with failure + flow_generator + .finish_running_flow(flow_id_compact, TaskOutcome::Failed(TaskError::Empty)) + .await; + + let res = flow_event_store + .try_get_pending_flow(&flow_key_ingest) + .await + .unwrap(); + assert!(res.is_none()); + let res = flow_event_store + .try_get_pending_flow(&flow_key_compact) + .await + .unwrap(); + assert!(res.is_none()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_pending_flow_multiple_datasets_crud(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let dataset_foo_id = DatasetID::new_seeded_ed25519(b"foo"); + let dataset_bar_id = DatasetID::new_seeded_ed25519(b"bar"); + + let flow_key_foo_ingest = FlowKey::dataset(dataset_foo_id.clone(), DatasetFlowType::Ingest); + let flow_key_bar_ingest = FlowKey::dataset(dataset_bar_id.clone(), DatasetFlowType::Ingest); + let flow_key_foo_compact = + FlowKey::dataset(dataset_foo_id.clone(), DatasetFlowType::HardCompaction); + + // No pending yet + let res = flow_event_store + .try_get_pending_flow(&flow_key_foo_ingest) + .await + .unwrap(); + assert!(res.is_none()); + let res = flow_event_store + .try_get_pending_flow(&flow_key_bar_ingest) + .await + .unwrap(); + assert!(res.is_none()); + let res = flow_event_store + .try_get_pending_flow(&flow_key_foo_compact) + .await + .unwrap(); + assert!(res.is_none()); + + // Schedule flows + let foo_flow_generator = DatasetFlowGenerator::new(&dataset_foo_id, flow_event_store.clone()); + let bar_flow_generator = DatasetFlowGenerator::new(&dataset_bar_id, flow_event_store.clone()); + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id_foo_ingest = foo_flow_generator + .make_new_flow( + DatasetFlowType::Ingest, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + let flow_id_bar_ingest = bar_flow_generator + .make_new_flow( + DatasetFlowType::Ingest, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + let flow_id_foo_compacting = foo_flow_generator + .make_new_flow( + DatasetFlowType::HardCompaction, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + let res = flow_event_store + .try_get_pending_flow(&flow_key_foo_ingest) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_foo_ingest)); + let res = flow_event_store + .try_get_pending_flow(&flow_key_bar_ingest) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_bar_ingest)); + let res = flow_event_store + .try_get_pending_flow(&flow_key_foo_compact) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_foo_compacting)); + + // Foo flows run & finish + foo_flow_generator + .start_running_flow(flow_id_foo_ingest) + .await; + foo_flow_generator + .start_running_flow(flow_id_foo_compacting) + .await; + foo_flow_generator + .finish_running_flow(flow_id_foo_ingest, TaskOutcome::Success(TaskResult::Empty)) + .await; + foo_flow_generator + .finish_running_flow( + flow_id_foo_compacting, + TaskOutcome::Success(TaskResult::Empty), + ) + .await; + + let res = flow_event_store + .try_get_pending_flow(&flow_key_foo_ingest) + .await + .unwrap(); + assert!(res.is_none()); + let res = flow_event_store + .try_get_pending_flow(&flow_key_bar_ingest) + .await + .unwrap(); + assert_eq!(res, Some(flow_id_bar_ingest)); + let res = flow_event_store + .try_get_pending_flow(&flow_key_foo_compact) + .await + .unwrap(); + assert!(res.is_none()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_pending_flow_system_flow_crud(catalog: &Catalog) { + let flow_event_store = catalog.get_one::().unwrap(); + + let system_flow_key = FlowKey::system(SystemFlowType::GC); + + // No pending yet + + let res = flow_event_store + .try_get_pending_flow(&system_flow_key) + .await + .unwrap(); + assert!(res.is_none()); + + // Schedule flow + + let flow_generator = SystemFlowGenerator::new(flow_event_store.clone()); + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id = flow_generator + .make_new_flow( + SystemFlowType::GC, + FlowStatus::Waiting, + automatic_trigger.clone(), + None, + ) + .await; + + let res = flow_event_store + .try_get_pending_flow(&system_flow_key) + .await + .unwrap(); + assert_eq!(res, Some(flow_id)); + + // Run flow + flow_generator.start_running_flow(flow_id).await; + + let res = flow_event_store + .try_get_pending_flow(&system_flow_key) + .await + .unwrap(); + assert_eq!(res, Some(flow_id)); + + // Finish flow + flow_generator + .finish_running_flow(flow_id, TaskOutcome::Success(TaskResult::Empty)) + .await; + + let res = flow_event_store + .try_get_pending_flow(&system_flow_key) + .await + .unwrap(); + assert!(res.is_none()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct DatasetTestCase { + dataset_id: DatasetID, + ingest_flow_ids: TestFlowIDs, + compaction_flow_ids: TestFlowIDs, +} + +struct SystemTestCase { + gc_flow_ids: TestFlowIDs, +} + +struct TestFlowIDs { + flow_id_waiting: FlowID, // Initiator: petya + flow_id_running: FlowID, // Initiator: wasya + flow_id_finished: FlowID, // Initiator: system +} + +async fn make_dataset_test_case(flow_event_store: Arc) -> DatasetTestCase { + let (_, dataset_id) = DatasetID::new_generated_ed25519(); + + DatasetTestCase { + dataset_id: dataset_id.clone(), + ingest_flow_ids: make_dataset_test_flows( + &dataset_id, + DatasetFlowType::Ingest, + flow_event_store.clone(), + ) + .await, + compaction_flow_ids: make_dataset_test_flows( + &dataset_id, + DatasetFlowType::HardCompaction, + flow_event_store, + ) + .await, + } +} + +async fn make_system_test_case(flow_event_store: Arc) -> SystemTestCase { + SystemTestCase { + gc_flow_ids: make_system_test_flows(SystemFlowType::GC, flow_event_store).await, + } +} + +async fn make_dataset_test_flows( + dataset_id: &DatasetID, + dataset_flow_type: DatasetFlowType, + flow_event_store: Arc, +) -> TestFlowIDs { + let flow_generator = DatasetFlowGenerator::new(dataset_id, flow_event_store.clone()); + + let wasya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { + trigger_time: Utc::now(), + initiator_account_id: AccountID::new_seeded_ed25519(b"wasya"), + }); + + let petya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { + trigger_time: Utc::now(), + initiator_account_id: AccountID::new_seeded_ed25519(b"petya"), + }); + + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id_waiting = flow_generator + .make_new_flow( + dataset_flow_type, + FlowStatus::Waiting, + petya_manual_trigger, + None, + ) + .await; + let flow_id_running = flow_generator + .make_new_flow( + dataset_flow_type, + FlowStatus::Running, + wasya_manual_trigger, + None, + ) + .await; + let flow_id_finished = flow_generator + .make_new_flow( + dataset_flow_type, + FlowStatus::Finished, + automatic_trigger, + None, + ) + .await; + + TestFlowIDs { + flow_id_waiting, + flow_id_running, + flow_id_finished, + } +} + +async fn make_system_test_flows( + system_flow_type: SystemFlowType, + flow_event_store: Arc, +) -> TestFlowIDs { + let flow_generator = SystemFlowGenerator::new(flow_event_store.clone()); + + let wasya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { + trigger_time: Utc::now(), + initiator_account_id: AccountID::new_seeded_ed25519(b"wasya"), + }); + + let petya_manual_trigger = FlowTrigger::Manual(FlowTriggerManual { + trigger_time: Utc::now(), + initiator_account_id: AccountID::new_seeded_ed25519(b"petya"), + }); + + let automatic_trigger = FlowTrigger::AutoPolling(FlowTriggerAutoPolling { + trigger_time: Utc::now(), + }); + + let flow_id_waiting = flow_generator + .make_new_flow( + system_flow_type, + FlowStatus::Waiting, + petya_manual_trigger, + None, + ) + .await; + let flow_id_running = flow_generator + .make_new_flow( + system_flow_type, + FlowStatus::Running, + wasya_manual_trigger, + None, + ) + .await; + let flow_id_finished = flow_generator + .make_new_flow( + system_flow_type, + FlowStatus::Finished, + automatic_trigger, + None, + ) + .await; + + TestFlowIDs { + flow_id_waiting, + flow_id_running, + flow_id_finished, + } +} + +async fn assert_dataset_flow_expectaitons( + flow_event_store: Arc, + dataset_test_case: &DatasetTestCase, + filters: DatasetFlowFilters, + pagination: PaginationOpts, + expected_total_count: usize, + expected_flow_ids: Vec, +) { + let total_flows_count = flow_event_store + .get_count_flows_by_dataset(&dataset_test_case.dataset_id, &filters) + .await + .unwrap(); + assert_eq!(expected_total_count, total_flows_count); + + let flow_ids: Vec<_> = flow_event_store + .get_all_flow_ids_by_dataset(&dataset_test_case.dataset_id, &filters, pagination) + .try_collect() + .await + .unwrap(); + assert_eq!(flow_ids, expected_flow_ids); +} + +async fn assert_multiple_dataset_flow_expectations( + flow_event_store: Arc, + dataset_ids: Vec, + filters: DatasetFlowFilters, + pagination: PaginationOpts, + expected_flow_ids: Vec, +) { + let flow_ids: Vec<_> = flow_event_store + .get_all_flow_ids_by_datasets(HashSet::from_iter(dataset_ids), &filters, pagination) + .try_collect() + .await + .unwrap(); + assert_eq!(flow_ids, expected_flow_ids); +} + +async fn assert_system_flow_expectaitons( + flow_event_store: Arc, + filters: SystemFlowFilters, + pagination: PaginationOpts, + expected_total_count: usize, + expected_flow_ids: Vec, +) { + let total_flows_count = flow_event_store + .get_count_system_flows(&filters) + .await + .unwrap(); + assert_eq!(expected_total_count, total_flows_count); + + let flow_ids: Vec<_> = flow_event_store + .get_all_system_flow_ids(&filters, pagination) + .try_collect() + .await + .unwrap(); + assert_eq!(flow_ids, expected_flow_ids); +} + +async fn assert_all_flow_expectaitons( + flow_event_store: Arc, + filters: AllFlowFilters, + pagination: PaginationOpts, + expected_total_count: usize, + expected_flow_ids: Vec, +) { + let total_flows_count = flow_event_store + .get_count_all_flows(&filters) + .await + .unwrap(); + assert_eq!(expected_total_count, total_flows_count); + + let flow_ids: Vec<_> = flow_event_store + .get_all_flow_ids(&filters, pagination) + .try_collect() + .await + .unwrap(); + assert_eq!(flow_ids, expected_flow_ids); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct DatasetFlowGenerator<'a> { + dataset_id: &'a DatasetID, + flow_event_store: Arc, +} + +impl<'a> DatasetFlowGenerator<'a> { + fn new(dataset_id: &'a DatasetID, flow_event_store: Arc) -> Self { + Self { + dataset_id, + flow_event_store, + } + } + + async fn make_new_flow( + &self, + flow_type: DatasetFlowType, + expected_status: FlowStatus, + initial_trigger: FlowTrigger, + config_snapshot: Option, + ) -> FlowID { + let flow_id = self.flow_event_store.new_flow_id().await.unwrap(); + + let creation_moment = Utc::now(); + + let mut flow = Flow::new( + creation_moment, + flow_id, + FlowKeyDataset { + dataset_id: self.dataset_id.clone(), + flow_type, + } + .into(), + initial_trigger, + config_snapshot, + ); + + drive_flow_to_status(&mut flow, expected_status); + + flow.save(self.flow_event_store.as_ref()).await.unwrap(); + + flow_id + } + + async fn start_running_flow(&self, flow_id: FlowID) { + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) + .await + .unwrap(); + + assert_eq!(flow.status(), FlowStatus::Waiting); + + drive_flow_to_status(&mut flow, FlowStatus::Running); + + flow.save(self.flow_event_store.as_ref()).await.unwrap(); + } + + async fn finish_running_flow(&self, flow_id: FlowID, outcome: TaskOutcome) { + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) + .await + .unwrap(); + + assert_eq!(flow.status(), FlowStatus::Running); + + let flow_id: u64 = flow.flow_id.into(); + + flow.on_task_finished( + flow.timing.running_since.unwrap() + Duration::try_minutes(10).unwrap(), + TaskID::new(flow_id * 2 + 1), + outcome, + ) + .unwrap(); + + flow.save(self.flow_event_store.as_ref()).await.unwrap(); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct SystemFlowGenerator { + flow_event_store: Arc, +} + +impl SystemFlowGenerator { + fn new(flow_event_store: Arc) -> Self { + Self { flow_event_store } + } + + async fn make_new_flow( + &self, + flow_type: SystemFlowType, + expected_status: FlowStatus, + initial_trigger: FlowTrigger, + config_snapshot: Option, + ) -> FlowID { + let flow_id = self.flow_event_store.new_flow_id().await.unwrap(); + + let creation_moment = Utc::now(); + + let mut flow = Flow::new( + creation_moment, + flow_id, + FlowKey::System(FlowKeySystem { flow_type }), + initial_trigger, + config_snapshot, + ); + + drive_flow_to_status(&mut flow, expected_status); + + flow.save(self.flow_event_store.as_ref()).await.unwrap(); + + flow_id + } + + async fn start_running_flow(&self, flow_id: FlowID) { + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) + .await + .unwrap(); + + assert_eq!(flow.status(), FlowStatus::Waiting); + + drive_flow_to_status(&mut flow, FlowStatus::Running); + + flow.save(self.flow_event_store.as_ref()).await.unwrap(); + } + + async fn finish_running_flow(&self, flow_id: FlowID, outcome: TaskOutcome) { + let mut flow = Flow::load(flow_id, self.flow_event_store.as_ref()) + .await + .unwrap(); + + assert_eq!(flow.status(), FlowStatus::Running); + + let flow_id: u64 = flow.flow_id.into(); + + flow.on_task_finished( + flow.timing.running_since.unwrap() + Duration::try_minutes(10).unwrap(), + TaskID::new(flow_id * 2 + 1), + outcome, + ) + .unwrap(); + + flow.save(self.flow_event_store.as_ref()).await.unwrap(); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +fn drive_flow_to_status(flow: &mut Flow, expected_status: FlowStatus) { + let start_moment = Utc::now(); + + flow.set_relevant_start_condition( + start_moment + Duration::try_seconds(1).unwrap(), + FlowStartCondition::Schedule(FlowStartConditionSchedule { + wake_up_at: start_moment + Duration::try_minutes(1).unwrap(), + }), + ) + .unwrap(); + + if expected_status != FlowStatus::Waiting { + // Derived task id from flow id just to ensure unique values + let flow_id: u64 = flow.flow_id.into(); + let task_id = TaskID::new(flow_id * 2 + 1); + + flow.on_task_scheduled(start_moment + Duration::try_minutes(5).unwrap(), task_id) + .unwrap(); + flow.on_task_running(start_moment + Duration::try_minutes(7).unwrap(), task_id) + .unwrap(); + + if expected_status == FlowStatus::Finished { + flow.on_task_finished( + start_moment + Duration::try_minutes(10).unwrap(), + task_id, + TaskOutcome::Success(TaskResult::Empty), + ) + .unwrap(); + } else if expected_status != FlowStatus::Running { + panic!("Not expecting flow status {expected_status:?}"); + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/flow-system/sqlite/.sqlx/query-0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10.json b/src/infra/flow-system/sqlite/.sqlx/query-0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10.json new file mode 100644 index 000000000..70a043c3c --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n UPDATE flows\n SET flow_status = $2\n WHERE flow_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 2 + }, + "nullable": [] + }, + "hash": "0160db224abe1296b551d4cabfe77563124bab77494e435de9fa52f92cc71c10" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-0e171da9db328875a9dab79e608b27390c63727c238c2f277ec59814202a5de3.json b/src/infra/flow-system/sqlite/.sqlx/query-0e171da9db328875a9dab79e608b27390c63727c238c2f277ec59814202a5de3.json new file mode 100644 index 000000000..340da7d1e --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-0e171da9db328875a9dab79e608b27390c63727c238c2f277ec59814202a5de3.json @@ -0,0 +1,26 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT event_id, event_payload as \"event_payload: _\"\n FROM flow_configuration_events\n WHERE system_flow_type = $1\n AND (cast($2 as INT8) IS NULL or event_id > $2)\n AND (cast($3 as INT8) IS NULL or event_id <= $3)\n ", + "describe": { + "columns": [ + { + "name": "event_id", + "ordinal": 0, + "type_info": "Integer" + }, + { + "name": "event_payload: _", + "ordinal": 1, + "type_info": "Null" + } + ], + "parameters": { + "Right": 3 + }, + "nullable": [ + false, + false + ] + }, + "hash": "0e171da9db328875a9dab79e608b27390c63727c238c2f277ec59814202a5de3" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-0ee66478537b60b68540a836a6b5d3a3dd122472ef001096d2e0876ebbf9b2c4.json b/src/infra/flow-system/sqlite/.sqlx/query-0ee66478537b60b68540a836a6b5d3a3dd122472ef001096d2e0876ebbf9b2c4.json new file mode 100644 index 000000000..001419693 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-0ee66478537b60b68540a836a6b5d3a3dd122472ef001096d2e0876ebbf9b2c4.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n INSERT INTO flow_ids(created_time) VALUES($1) RETURNING flow_id as \"flow_id: _\"\n ", + "describe": { + "columns": [ + { + "name": "flow_id: _", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "0ee66478537b60b68540a836a6b5d3a3dd122472ef001096d2e0876ebbf9b2c4" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b.json b/src/infra/flow-system/sqlite/.sqlx/query-1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b.json new file mode 100644 index 000000000..669198b19 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n INSERT INTO flows (flow_id, dataset_id, dataset_flow_type, initiator, flow_status)\n VALUES ($1, $2, $3, $4, $5)\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 5 + }, + "nullable": [] + }, + "hash": "1b67c502d944b04e15eba5980d87cca25a7d19ff7841cf56f78c133260b2da9b" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-f1b80b1c4ed8bf92f225e4f91b14b3fa4ee0088cf3af415e0bdcfe5f408b5cbc.json b/src/infra/flow-system/sqlite/.sqlx/query-25979568c6cba50fb06efaf49c13d398aeb1aa65be675c00df8881f6bae5bbb5.json similarity index 51% rename from src/infra/flow-system/sqlite/.sqlx/query-f1b80b1c4ed8bf92f225e4f91b14b3fa4ee0088cf3af415e0bdcfe5f408b5cbc.json rename to src/infra/flow-system/sqlite/.sqlx/query-25979568c6cba50fb06efaf49c13d398aeb1aa65be675c00df8881f6bae5bbb5.json index be4d027e3..2ec2802a8 100644 --- a/src/infra/flow-system/sqlite/.sqlx/query-f1b80b1c4ed8bf92f225e4f91b14b3fa4ee0088cf3af415e0bdcfe5f408b5cbc.json +++ b/src/infra/flow-system/sqlite/.sqlx/query-25979568c6cba50fb06efaf49c13d398aeb1aa65be675c00df8881f6bae5bbb5.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\n SELECT event_id, event_payload as \"event_payload: _\"\n FROM dataset_flow_configuration_events\n WHERE dataset_id = $1\n AND dataset_flow_type = $2\n AND (cast($3 as INT8) IS NULL or event_id > $3)\n AND (cast($4 as INT8) IS NULL or event_id <= $4)\n ", + "query": "\n SELECT event_id, event_payload as \"event_payload: _\"\n FROM flow_configuration_events\n WHERE dataset_id = $1\n AND dataset_flow_type = $2\n AND (cast($3 as INT8) IS NULL or event_id > $3)\n AND (cast($4 as INT8) IS NULL or event_id <= $4)\n ", "describe": { "columns": [ { @@ -22,5 +22,5 @@ false ] }, - "hash": "f1b80b1c4ed8bf92f225e4f91b14b3fa4ee0088cf3af415e0bdcfe5f408b5cbc" + "hash": "25979568c6cba50fb06efaf49c13d398aeb1aa65be675c00df8881f6bae5bbb5" } diff --git a/src/infra/flow-system/sqlite/.sqlx/query-2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc.json b/src/infra/flow-system/sqlite/.sqlx/query-2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc.json new file mode 100644 index 000000000..f28d4c45e --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT COUNT(event_id) AS events_count\n FROM flow_configuration_events\n ", + "describe": { + "columns": [ + { + "name": "events_count", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "2cc10d49799d593246763ac7f9b4980f8fca6c18bd5eea916ccefd5ce5ca87bc" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-3bbb2036b47443af2876e668212529ac8cbb84a303fee634ee35907bc00b40e3.json b/src/infra/flow-system/sqlite/.sqlx/query-3bbb2036b47443af2876e668212529ac8cbb84a303fee634ee35907bc00b40e3.json new file mode 100644 index 000000000..59e87d882 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-3bbb2036b47443af2876e668212529ac8cbb84a303fee634ee35907bc00b40e3.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT attempt.last_event_time as \"last_event_time: _\"\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n f.dataset_id = $1 AND\n f.dataset_flow_type = $2\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS attempt\n ", + "describe": { + "columns": [ + { + "name": "last_event_time: _", + "ordinal": 0, + "type_info": "Null" + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false + ] + }, + "hash": "3bbb2036b47443af2876e668212529ac8cbb84a303fee634ee35907bc00b40e3" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-427f644c7f09dc8ba8bea1c8d9702f97af2ca0903ac6a02e5126d07f6d8c9d55.json b/src/infra/flow-system/sqlite/.sqlx/query-427f644c7f09dc8ba8bea1c8d9702f97af2ca0903ac6a02e5126d07f6d8c9d55.json deleted file mode 100644 index 9475b0d50..000000000 --- a/src/infra/flow-system/sqlite/.sqlx/query-427f644c7f09dc8ba8bea1c8d9702f97af2ca0903ac6a02e5126d07f6d8c9d55.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n SELECT COUNT(event_id) as count\n FROM flow_configuration_event\n ", - "describe": { - "columns": [ - { - "name": "count", - "ordinal": 0, - "type_info": "Integer" - } - ], - "parameters": { - "Right": 0 - }, - "nullable": [ - false - ] - }, - "hash": "427f644c7f09dc8ba8bea1c8d9702f97af2ca0903ac6a02e5126d07f6d8c9d55" -} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f.json b/src/infra/flow-system/sqlite/.sqlx/query-58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f.json deleted file mode 100644 index 9697be35b..000000000 --- a/src/infra/flow-system/sqlite/.sqlx/query-58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n SELECT DISTINCT dataset_id\n FROM dataset_flow_configuration_events\n WHERE event_type = 'FlowConfigurationEventCreated'\n ", - "describe": { - "columns": [ - { - "name": "dataset_id", - "ordinal": 0, - "type_info": "Text" - } - ], - "parameters": { - "Right": 0 - }, - "nullable": [ - false - ] - }, - "hash": "58a1350c7854ca8f7b249a024b61dca2adef4e2dd583bfe9bdec9693da9b080f" -} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-59df498ffa1f514e2d9bca470ec04a449579df624cc967551071e029427d3a78.json b/src/infra/flow-system/sqlite/.sqlx/query-59df498ffa1f514e2d9bca470ec04a449579df624cc967551071e029427d3a78.json new file mode 100644 index 000000000..9c6493004 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-59df498ffa1f514e2d9bca470ec04a449579df624cc967551071e029427d3a78.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT success.last_event_time as \"last_event_time: _\"\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n e.event_payload ->> '$.TaskFinished.task_outcome.Success' IS NOT NULL AND\n f.dataset_id = $1 AND\n f.dataset_flow_type = $2\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS success\n ", + "describe": { + "columns": [ + { + "name": "last_event_time: _", + "ordinal": 0, + "type_info": "Null" + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false + ] + }, + "hash": "59df498ffa1f514e2d9bca470ec04a449579df624cc967551071e029427d3a78" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430.json b/src/infra/flow-system/sqlite/.sqlx/query-5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430.json new file mode 100644 index 000000000..3037b3652 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT DISTINCT(initiator) FROM flows\n WHERE dataset_id = $1 AND initiator != $2\n ", + "describe": { + "columns": [ + { + "name": "initiator", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false + ] + }, + "hash": "5c2dfe02e62486feef2c6302a89e28d8d283f8a3ecb39288e2fb497a21bce430" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-617f09b8030c0f34215089ff40840c24e10cfe4a25677d15c0827e47060d55c6.json b/src/infra/flow-system/sqlite/.sqlx/query-617f09b8030c0f34215089ff40840c24e10cfe4a25677d15c0827e47060d55c6.json new file mode 100644 index 000000000..ac5337c1e --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-617f09b8030c0f34215089ff40840c24e10cfe4a25677d15c0827e47060d55c6.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT flow_id FROM flows\n WHERE system_flow_type = $1 AND\n flow_status != 'finished'\n ORDER BY flow_id DESC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "name": "flow_id", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "617f09b8030c0f34215089ff40840c24e10cfe4a25677d15c0827e47060d55c6" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-64d705352d2aadd9bc52680aa2128430921f7fb6fe4ac1ba4f3cacfebf0cf364.json b/src/infra/flow-system/sqlite/.sqlx/query-64d705352d2aadd9bc52680aa2128430921f7fb6fe4ac1ba4f3cacfebf0cf364.json new file mode 100644 index 000000000..5f0642978 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-64d705352d2aadd9bc52680aa2128430921f7fb6fe4ac1ba4f3cacfebf0cf364.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT flow_id FROM flows\n WHERE dataset_id = $1 AND\n dataset_flow_type = $2 AND\n flow_status != 'finished'\n ORDER BY flow_id DESC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "name": "flow_id", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false + ] + }, + "hash": "64d705352d2aadd9bc52680aa2128430921f7fb6fe4ac1ba4f3cacfebf0cf364" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9.json b/src/infra/flow-system/sqlite/.sqlx/query-6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9.json new file mode 100644 index 000000000..a01eb33d8 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT COUNT(event_id) AS events_count\n FROM flow_events\n ", + "describe": { + "columns": [ + { + "name": "events_count", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "6790bcb0ad8f2be93e2383f53ed9b3a56df2f633ae487465fce85192730d30b9" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854.json b/src/infra/flow-system/sqlite/.sqlx/query-68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854.json new file mode 100644 index 000000000..9a20fc6b8 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT DISTINCT dataset_id\n FROM flow_configuration_events\n WHERE\n dataset_id IS NOT NULL AND\n event_type = 'FlowConfigurationEventCreated'\n ", + "describe": { + "columns": [ + { + "name": "dataset_id", + "ordinal": 0, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + true + ] + }, + "hash": "68065599ecca2f0e8b0cccce7cea20fdb2fe6e6b01e7dc8080d16dd95ac0b854" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-0fced2f53b4b35c6b07202c89bed9f0153bf0f000c4006a1d5103a6fee499d3a.json b/src/infra/flow-system/sqlite/.sqlx/query-75a9c9e34c11dd13537fe4434ee4670d79ea52c14cb3df8802852dca993506a8.json similarity index 54% rename from src/infra/flow-system/sqlite/.sqlx/query-0fced2f53b4b35c6b07202c89bed9f0153bf0f000c4006a1d5103a6fee499d3a.json rename to src/infra/flow-system/sqlite/.sqlx/query-75a9c9e34c11dd13537fe4434ee4670d79ea52c14cb3df8802852dca993506a8.json index e0c9f1e73..cbf389f98 100644 --- a/src/infra/flow-system/sqlite/.sqlx/query-0fced2f53b4b35c6b07202c89bed9f0153bf0f000c4006a1d5103a6fee499d3a.json +++ b/src/infra/flow-system/sqlite/.sqlx/query-75a9c9e34c11dd13537fe4434ee4670d79ea52c14cb3df8802852dca993506a8.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\n SELECT event_id, event_payload as \"event_payload: _\"\n FROM system_flow_configuration_events\n WHERE system_flow_type = $1\n AND (cast($2 as INT8) IS NULL or event_id > $2)\n AND (cast($3 as INT8) IS NULL or event_id <= $3)\n ", + "query": "\n SELECT event_id, event_payload as \"event_payload: _\"\n FROM flow_events\n WHERE flow_id = $1\n AND (cast($2 as INT8) IS NULL OR event_id > $2)\n AND (cast($3 as INT8) IS NULL OR event_id <= $3)\n ", "describe": { "columns": [ { @@ -22,5 +22,5 @@ false ] }, - "hash": "0fced2f53b4b35c6b07202c89bed9f0153bf0f000c4006a1d5103a6fee499d3a" + "hash": "75a9c9e34c11dd13537fe4434ee4670d79ea52c14cb3df8802852dca993506a8" } diff --git a/src/infra/flow-system/sqlite/.sqlx/query-ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57.json b/src/infra/flow-system/sqlite/.sqlx/query-ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57.json new file mode 100644 index 000000000..5538c8e64 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n INSERT INTO flows (flow_id, system_flow_type, initiator, flow_status)\n VALUES ($1, $2, $3, $4)\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 4 + }, + "nullable": [] + }, + "hash": "ad537419dd9b8ce0fd40974ec67198309bb3c5060b751295a27c57a78fac6a57" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-bb532e48b0b700abb78665cb59caf7770be4f1591a93c52369693abb39adbf63.json b/src/infra/flow-system/sqlite/.sqlx/query-bb532e48b0b700abb78665cb59caf7770be4f1591a93c52369693abb39adbf63.json new file mode 100644 index 000000000..30b206b1f --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-bb532e48b0b700abb78665cb59caf7770be4f1591a93c52369693abb39adbf63.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT attempt.last_event_time as \"last_event_time: _\"\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n f.system_flow_type = $1\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS attempt\n ", + "describe": { + "columns": [ + { + "name": "last_event_time: _", + "ordinal": 0, + "type_info": "Null" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "bb532e48b0b700abb78665cb59caf7770be4f1591a93c52369693abb39adbf63" +} diff --git a/src/infra/flow-system/sqlite/.sqlx/query-eac6c0ffa6d45dbb073fc40d5154722fffc6aafbd08d7c19cf9a85d84635a6e0.json b/src/infra/flow-system/sqlite/.sqlx/query-eac6c0ffa6d45dbb073fc40d5154722fffc6aafbd08d7c19cf9a85d84635a6e0.json new file mode 100644 index 000000000..9dc2e2fc9 --- /dev/null +++ b/src/infra/flow-system/sqlite/.sqlx/query-eac6c0ffa6d45dbb073fc40d5154722fffc6aafbd08d7c19cf9a85d84635a6e0.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT success.last_event_time as \"last_event_time: _\"\n FROM (\n SELECT e.event_id as event_id, e.event_time AS last_event_time\n FROM flow_events e\n INNER JOIN flows f ON f.flow_id = e.flow_id\n WHERE\n e.event_type = 'FlowEventTaskFinished' AND\n e.event_payload ->> '$.TaskFinished.task_outcome.Success' IS NOT NULL AND\n f.system_flow_type = $1\n ORDER BY e.event_id DESC\n LIMIT 1\n ) AS success\n ", + "describe": { + "columns": [ + { + "name": "last_event_time: _", + "ordinal": 0, + "type_info": "Null" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "eac6c0ffa6d45dbb073fc40d5154722fffc6aafbd08d7c19cf9a85d84635a6e0" +} diff --git a/src/infra/flow-system/sqlite/src/lib.rs b/src/infra/flow-system/sqlite/src/lib.rs index 8877c5cdf..379f27492 100644 --- a/src/infra/flow-system/sqlite/src/lib.rs +++ b/src/infra/flow-system/sqlite/src/lib.rs @@ -10,6 +10,8 @@ // Re-exports pub use kamu_flow_system as domain; -mod sqlite_flow_system_event_store; +mod sqlite_flow_configuration_event_store; +mod sqlite_flow_event_store; -pub use sqlite_flow_system_event_store::*; +pub use sqlite_flow_configuration_event_store::*; +pub use sqlite_flow_event_store::*; diff --git a/src/infra/flow-system/sqlite/src/sqlite_flow_system_event_store.rs b/src/infra/flow-system/sqlite/src/sqlite_flow_configuration_event_store.rs similarity index 65% rename from src/infra/flow-system/sqlite/src/sqlite_flow_system_event_store.rs rename to src/infra/flow-system/sqlite/src/sqlite_flow_configuration_event_store.rs index 2e699a385..467800e72 100644 --- a/src/infra/flow-system/sqlite/src/sqlite_flow_system_event_store.rs +++ b/src/infra/flow-system/sqlite/src/sqlite_flow_configuration_event_store.rs @@ -30,28 +30,27 @@ struct ReturningEventModel { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct SqliteFlowSystemEventStore { +pub struct SqliteFlowConfigurationEventStore { transaction: TransactionRefT, } #[component(pub)] #[interface(dyn FlowConfigurationEventStore)] -impl SqliteFlowSystemEventStore { +impl SqliteFlowConfigurationEventStore { pub fn new(transaction: TransactionRef) -> Self { Self { transaction: transaction.into(), } } - async fn get_system_events( + fn get_system_events( &self, fk_system: FlowKeySystem, maybe_from_id: Option, maybe_to_id: Option, ) -> EventStream { - let mut tr = self.transaction.lock().await; - Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -60,10 +59,10 @@ impl SqliteFlowSystemEventStore { EventModel, r#" SELECT event_id, event_payload as "event_payload: _" - FROM system_flow_configuration_events - WHERE system_flow_type = $1 - AND (cast($2 as INT8) IS NULL or event_id > $2) - AND (cast($3 as INT8) IS NULL or event_id <= $3) + FROM flow_configuration_events + WHERE system_flow_type = $1 + AND (cast($2 as INT8) IS NULL or event_id > $2) + AND (cast($3 as INT8) IS NULL or event_id <= $3) "#, fk_system.flow_type, maybe_from_id, @@ -84,15 +83,14 @@ impl SqliteFlowSystemEventStore { }) } - async fn get_dataset_events( + fn get_dataset_events( &self, fk_dataset: FlowKeyDataset, maybe_from_id: Option, maybe_to_id: Option, ) -> EventStream { - let mut tr = self.transaction.lock().await; - Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -103,11 +101,11 @@ impl SqliteFlowSystemEventStore { EventModel, r#" SELECT event_id, event_payload as "event_payload: _" - FROM dataset_flow_configuration_events - WHERE dataset_id = $1 - AND dataset_flow_type = $2 - AND (cast($3 as INT8) IS NULL or event_id > $3) - AND (cast($4 as INT8) IS NULL or event_id <= $4) + FROM flow_configuration_events + WHERE dataset_id = $1 + AND dataset_flow_type = $2 + AND (cast($3 as INT8) IS NULL or event_id > $3) + AND (cast($4 as INT8) IS NULL or event_id <= $4) "#, dataset_id, fk_dataset.flow_type, @@ -133,8 +131,8 @@ impl SqliteFlowSystemEventStore { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl EventStore for SqliteFlowSystemEventStore { - async fn get_events( +impl EventStore for SqliteFlowConfigurationEventStore { + fn get_events( &self, flow_key: &FlowKey, opts: GetEventsOpts, @@ -145,11 +143,9 @@ impl EventStore for SqliteFlowSystemEventStore { match flow_key.clone() { FlowKey::Dataset(fk_dataset) => { self.get_dataset_events(fk_dataset, maybe_from_id, maybe_to_id) - .await } FlowKey::System(fk_system) => { self.get_system_events(fk_system, maybe_from_id, maybe_to_id) - .await } } } @@ -166,68 +162,39 @@ impl EventStore for SqliteFlowSystemEventStore { let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; - let flow_configuration_events = { - let mut query_builder = QueryBuilder::::new( - r#" - INSERT INTO flow_configuration_event(created_time) - "#, - ); - - let created_times = vec![Utc::now(); events.len()]; - - query_builder.push_values(created_times, |mut b, created_time| { - b.push_bind(created_time); - }); - - query_builder.push("RETURNING event_id"); - - query_builder - .build_query_as::() - .fetch_all(connection_mut) - .await - .int_err()? - }; - - let connection_mut = tr.connection_mut().await?; let mut query_builder = match flow_key { FlowKey::Dataset(fk_dataset) => { let mut query_builder = QueryBuilder::::new( r#" - INSERT INTO dataset_flow_configuration_events (event_id, dataset_id, dataset_flow_type, event_type, event_time, event_payload) + INSERT INTO flow_configuration_events (created_time, dataset_id, dataset_flow_type, event_type, event_time, event_payload) "#, ); - query_builder.push_values( - events.into_iter().zip(flow_configuration_events), - |mut b, (event, ReturningEventModel { event_id })| { - b.push_bind(event_id); - b.push_bind(fk_dataset.dataset_id.to_string()); - b.push_bind(fk_dataset.flow_type); - b.push_bind(event.typename()); - b.push_bind(event.event_time()); - b.push_bind(serde_json::to_value(event).unwrap()); - }, - ); + query_builder.push_values(events.into_iter(), |mut b, event| { + b.push_bind(Utc::now()); + b.push_bind(fk_dataset.dataset_id.to_string()); + b.push_bind(fk_dataset.flow_type); + b.push_bind(event.typename()); + b.push_bind(event.event_time()); + b.push_bind(serde_json::to_value(event).unwrap()); + }); query_builder } FlowKey::System(fk_system) => { let mut query_builder = QueryBuilder::::new( r#" - INSERT INTO system_flow_configuration_events (event_id, system_flow_type, event_type, event_time, event_payload) + INSERT INTO flow_configuration_events (created_time, system_flow_type, event_type, event_time, event_payload) "#, ); - query_builder.push_values( - events.into_iter().zip(flow_configuration_events), - |mut b, (event, ReturningEventModel { event_id })| { - b.push_bind(event_id); - b.push_bind(fk_system.flow_type); - b.push_bind(event.typename()); - b.push_bind(event.event_time()); - b.push_bind(serde_json::to_value(event).unwrap()); - }, - ); + query_builder.push_values(events.into_iter(), |mut b, event| { + b.push_bind(Utc::now()); + b.push_bind(fk_system.flow_type); + b.push_bind(event.typename()); + b.push_bind(event.event_time()); + b.push_bind(serde_json::to_value(event).unwrap()); + }); query_builder } @@ -252,15 +219,15 @@ impl EventStore for SqliteFlowSystemEventStore { let result = sqlx::query!( r#" - SELECT COUNT(event_id) as count - FROM flow_configuration_event + SELECT COUNT(event_id) AS events_count + FROM flow_configuration_events "#, ) .fetch_one(connection_mut) .await .int_err()?; - let count = usize::try_from(result.count).int_err()?; + let count = usize::try_from(result.events_count).int_err()?; Ok(count) } @@ -269,22 +236,23 @@ impl EventStore for SqliteFlowSystemEventStore { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl FlowConfigurationEventStore for SqliteFlowSystemEventStore { - async fn list_all_dataset_ids(&self) -> FailableDatasetIDStream<'_> { - let mut tr = self.transaction.lock().await; - +impl FlowConfigurationEventStore for SqliteFlowConfigurationEventStore { + fn list_all_dataset_ids(&self) -> FailableDatasetIDStream<'_> { Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; let mut query_stream = sqlx::query!( r#" SELECT DISTINCT dataset_id - FROM dataset_flow_configuration_events - WHERE event_type = 'FlowConfigurationEventCreated' + FROM flow_configuration_events + WHERE + dataset_id IS NOT NULL AND + event_type = 'FlowConfigurationEventCreated' "#, ) .try_map(|event_row| { - DatasetID::from_did_str(event_row.dataset_id.as_str()) + DatasetID::from_did_str(event_row.dataset_id.unwrap().as_str()) .map_err(|e| sqlx::Error::Decode(Box::new(e))) }) .fetch(connection_mut) diff --git a/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs b/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs new file mode 100644 index 000000000..23a525418 --- /dev/null +++ b/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs @@ -0,0 +1,884 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashSet; + +use chrono::{DateTime, Utc}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; +use dill::*; +use futures::TryStreamExt; +use kamu_flow_system::*; +use opendatafabric::{AccountID, DatasetID}; +use sqlx::sqlite::SqliteRow; +use sqlx::{FromRow, QueryBuilder, Row, Sqlite}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +const SYSTEM_INITIATOR: &str = ""; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct SqliteFlowEventStore { + transaction: TransactionRefT, +} + +#[component(pub)] +#[interface(dyn FlowEventStore)] +impl SqliteFlowEventStore { + pub fn new(transaction: TransactionRef) -> Self { + Self { + transaction: transaction.into(), + } + } + + fn prepare_initiator_filter(by_initiator: &InitiatorFilter) -> Vec { + match by_initiator { + InitiatorFilter::System => vec![SYSTEM_INITIATOR.to_string()], + InitiatorFilter::Account(a) => a.iter().map(ToString::to_string).collect(), + } + } + + fn generate_placeholders_list(args: &[T], index_offset: usize) -> String { + args.iter() + .enumerate() + .map(|(i, _)| format!("${}", i + index_offset)) + .collect::>() + .join(", ") + } + + async fn save_flow_updates_from_events( + &self, + tr: &mut database_common::TransactionGuard<'_, Sqlite>, + events: &[FlowEvent], + ) -> Result<(), SaveEventsError> { + for event in events { + let event_flow_id: i64 = (event.flow_id()).try_into().unwrap(); + + if let FlowEvent::Initiated(e) = &event { + let connection_mut = tr.connection_mut().await?; + let initiator = e + .trigger + .initiator_account_id() + .map_or_else(|| SYSTEM_INITIATOR.to_string(), ToString::to_string); + + match &e.flow_key { + FlowKey::Dataset(fk_dataset) => { + let dataset_id = fk_dataset.dataset_id.to_string(); + let dataset_flow_type = fk_dataset.flow_type; + + sqlx::query!( + r#" + INSERT INTO flows (flow_id, dataset_id, dataset_flow_type, initiator, flow_status) + VALUES ($1, $2, $3, $4, $5) + "#, + event_flow_id, + dataset_id, + dataset_flow_type, + initiator, + FlowStatus::Waiting, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + FlowKey::System(fk_system) => { + let system_flow_type = fk_system.flow_type; + sqlx::query!( + r#" + INSERT INTO flows (flow_id, system_flow_type, initiator, flow_status) + VALUES ($1, $2, $3, $4) + "#, + event_flow_id, + system_flow_type, + initiator, + FlowStatus::Waiting as FlowStatus, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + } + } + /* Existing flow must have been indexed, update status */ + else if let Some(new_status) = event.new_status() { + let connection_mut = tr.connection_mut().await?; + sqlx::query!( + r#" + UPDATE flows + SET flow_status = $2 + WHERE flow_id = $1 + "#, + event_flow_id, + new_status, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + } + + Ok(()) + } + + async fn save_events_impl( + &self, + tr: &mut database_common::TransactionGuard<'_, Sqlite>, + events: &[FlowEvent], + ) -> Result { + let connection_mut = tr.connection_mut().await?; + + #[derive(FromRow)] + struct ResultRow { + event_id: i64, + } + + let mut query_builder = QueryBuilder::::new( + r#" + INSERT INTO flow_events (flow_id, event_time, event_type, event_payload) + "#, + ); + + query_builder.push_values(events, |mut b, event| { + let event_flow_id: i64 = (event.flow_id()).try_into().unwrap(); + b.push_bind(event_flow_id); + b.push_bind(event.event_time()); + b.push_bind(event.typename()); + b.push_bind(serde_json::to_value(event).unwrap()); + }); + + query_builder.push("RETURNING event_id"); + + let rows = query_builder + .build_query_as::() + .fetch_all(connection_mut) + .await + .int_err()?; + let last_event_id = rows.last().unwrap().event_id; + Ok(EventID::new(last_event_id)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl EventStore for SqliteFlowEventStore { + fn get_events(&self, flow_id: &FlowID, opts: GetEventsOpts) -> EventStream { + let flow_id: i64 = (*flow_id).try_into().unwrap(); + let maybe_from_id = opts.from.map(EventID::into_inner); + let maybe_to_id = opts.to.map(EventID::into_inner); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr + .connection_mut() + .await?; + + #[derive(Debug, sqlx::FromRow, PartialEq, Eq)] + #[allow(dead_code)] + pub struct EventModel { + pub event_id: i64, + pub event_payload: sqlx::types::JsonValue + } + + let mut query_stream = sqlx::query_as!( + EventModel, + r#" + SELECT event_id, event_payload as "event_payload: _" + FROM flow_events + WHERE flow_id = $1 + AND (cast($2 as INT8) IS NULL OR event_id > $2) + AND (cast($3 as INT8) IS NULL OR event_id <= $3) + "#, + flow_id, + maybe_from_id, + maybe_to_id, + ).try_map(|event_row| { + let event = serde_json::from_value::(event_row.event_payload) + .map_err(|e| sqlx::Error::Decode(Box::new(e)))?; + + Ok((EventID::new(event_row.event_id), event)) + }) + .fetch(connection_mut) + .map_err(|e| GetEventsError::Internal(e.int_err())); + + while let Some((event_id, event)) = query_stream.try_next().await? { + yield Ok((event_id, event)); + } + }) + } + + async fn save_events( + &self, + _flow_id: &FlowID, + events: Vec, + ) -> Result { + if events.is_empty() { + return Err(SaveEventsError::NothingToSave); + } + + let mut tr = self.transaction.lock().await; + + self.save_flow_updates_from_events(&mut tr, &events).await?; + let last_event_id = self.save_events_impl(&mut tr, &events).await?; + + Ok(last_event_id) + } + + async fn len(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT COUNT(event_id) AS events_count + FROM flow_events + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let count = usize::try_from(result.events_count).int_err()?; + Ok(count) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, sqlx::FromRow, PartialEq, Eq)] +#[allow(dead_code)] +pub struct NewFlow { + pub flow_id: i64, +} + +#[derive(Debug, sqlx::FromRow, PartialEq, Eq)] +#[allow(dead_code)] +pub struct RunStatsRow { + pub last_event_time: DateTime, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl FlowEventStore for SqliteFlowEventStore { + async fn new_flow_id(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let created_time = Utc::now(); + + let result = sqlx::query_as!( + NewFlow, + r#" + INSERT INTO flow_ids(created_time) VALUES($1) RETURNING flow_id as "flow_id: _" + "#, + created_time + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(FlowID::try_from(result.flow_id).unwrap()) + } + + async fn try_get_pending_flow( + &self, + flow_key: &FlowKey, + ) -> Result, InternalError> { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_flow_id = match flow_key { + FlowKey::Dataset(flow_key_dataset) => { + let dataset_id = flow_key_dataset.dataset_id.to_string(); + let flow_type = flow_key_dataset.flow_type; + + sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE dataset_id = $1 AND + dataset_flow_type = $2 AND + flow_status != 'finished' + ORDER BY flow_id DESC + LIMIT 1 + "#, + dataset_id, + flow_type, + ) + .fetch_optional(connection_mut) + .await + .int_err()? + .map(|r| r.flow_id) + } + + FlowKey::System(flow_key_system) => { + let flow_type = flow_key_system.flow_type; + + sqlx::query!( + r#" + SELECT flow_id FROM flows + WHERE system_flow_type = $1 AND + flow_status != 'finished' + ORDER BY flow_id DESC + LIMIT 1 + "#, + flow_type, + ) + .fetch_optional(connection_mut) + .await + .int_err()? + .map(|r| r.flow_id) + } + }; + + Ok(maybe_flow_id.map(|id| FlowID::try_from(id).unwrap())) + } + + async fn get_dataset_flow_run_stats( + &self, + dataset_id: &DatasetID, + flow_type: DatasetFlowType, + ) -> Result { + let mut tr = self.transaction.lock().await; + + let dataset_id = dataset_id.to_string(); + + let connection_mut = tr.connection_mut().await?; + let maybe_attempt_result = sqlx::query_as!( + RunStatsRow, + r#" + SELECT attempt.last_event_time as "last_event_time: _" + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + f.dataset_id = $1 AND + f.dataset_flow_type = $2 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS attempt + "#, + dataset_id, + flow_type, + ) + .map(|event_row| event_row.last_event_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + let connection_mut = tr.connection_mut().await?; + let maybe_success_result = sqlx::query_as!( + RunStatsRow, + r#" + SELECT success.last_event_time as "last_event_time: _" + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + e.event_payload ->> '$.TaskFinished.task_outcome.Success' IS NOT NULL AND + f.dataset_id = $1 AND + f.dataset_flow_type = $2 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS success + "#, + dataset_id, + flow_type + ) + .map(|event_row| event_row.last_event_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + Ok(FlowRunStats { + last_attempt_time: maybe_attempt_result, + last_success_time: maybe_success_result, + }) + } + + async fn get_system_flow_run_stats( + &self, + flow_type: SystemFlowType, + ) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + let maybe_attempt_result = sqlx::query_as!( + RunStatsRow, + r#" + SELECT attempt.last_event_time as "last_event_time: _" + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + f.system_flow_type = $1 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS attempt + "#, + flow_type + ) + .map(|event_row| event_row.last_event_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + let connection_mut = tr.connection_mut().await?; + let maybe_success_result = sqlx::query_as!( + RunStatsRow, + r#" + SELECT success.last_event_time as "last_event_time: _" + FROM ( + SELECT e.event_id as event_id, e.event_time AS last_event_time + FROM flow_events e + INNER JOIN flows f ON f.flow_id = e.flow_id + WHERE + e.event_type = 'FlowEventTaskFinished' AND + e.event_payload ->> '$.TaskFinished.task_outcome.Success' IS NOT NULL AND + f.system_flow_type = $1 + ORDER BY e.event_id DESC + LIMIT 1 + ) AS success + "#, + flow_type + ) + .map(|event_row| event_row.last_event_time) + .fetch_optional(connection_mut) + .await + .int_err()?; + + Ok(FlowRunStats { + last_attempt_time: maybe_attempt_result, + last_success_time: maybe_success_result, + }) + } + + fn get_all_flow_ids_by_dataset( + &self, + dataset_id: &DatasetID, + filters: &DatasetFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { + let dataset_id = dataset_id.to_string(); + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let maybe_by_flow_type = filters.by_flow_type; + let maybe_by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let query_str = format!( + r#" + SELECT flow_id FROM flows + WHERE dataset_id = $1 + AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2) + AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3) + AND ($4 = 0 OR initiator IN ({})) + ORDER BY flow_id DESC + LIMIT $5 OFFSET $6 + "#, + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 7)) + .unwrap_or_default(), + ); + + let mut query = sqlx::query(&query_str) + .bind(dataset_id) + .bind(maybe_by_flow_type) + .bind(maybe_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())) + .bind(i64::try_from(pagination.limit).unwrap()) + .bind(i64::try_from(pagination.offset).unwrap()); + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let mut query_stream = query.try_map(|event_row: SqliteRow| { + Ok(FlowID::new(event_row.get(0))) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + async fn get_count_flows_by_dataset( + &self, + dataset_id: &DatasetID, + filters: &DatasetFlowFilters, + ) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let dataset_id = dataset_id.to_string(); + let maybe_filters_by_flow_type = filters.by_flow_type; + let maybe_filters_by_flow_status = filters.by_flow_status; + + let query_str = format!( + r#" + SELECT COUNT(flow_id) AS flows_count + FROM flows + WHERE dataset_id = $1 + AND (cast($2 as dataset_flow_type) IS NULL OR dataset_flow_type = $2) + AND (cast($3 as flow_status_type) IS NULL OR flow_status = $3) + AND ($4 = 0 OR initiator IN ({})) + "#, + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 5)) + .unwrap_or_default() + ); + + let mut query = sqlx::query(&query_str) + .bind(dataset_id) + .bind(maybe_filters_by_flow_type) + .bind(maybe_filters_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())); + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let query_result = query.fetch_one(connection_mut).await.int_err()?; + let flows_count: i64 = query_result.get(0); + + Ok(usize::try_from(flows_count).unwrap()) + } + + fn get_all_flow_ids_by_datasets( + &self, + dataset_ids: HashSet, + filters: &DatasetFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { + let dataset_ids: Vec<_> = dataset_ids.iter().map(ToString::to_string).collect(); + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let maybe_by_flow_type = filters.by_flow_type; + let maybe_by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let query_str = format!( + r#" + SELECT flow_id FROM flows + WHERE dataset_id in ({}) + AND (cast($1 as dataset_flow_type) IS NULL OR dataset_flow_type = $1) + AND (cast($2 as flow_status_type) IS NULL OR flow_status = $2) + AND ($3 = 0 OR initiator in ({})) + ORDER BY flow_id DESC + LIMIT $4 OFFSET $5 + "#, + Self::generate_placeholders_list(&dataset_ids, 6), + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 6 + dataset_ids.len())) + .unwrap_or_default() + ); + + let mut query = sqlx::query(&query_str) + .bind(maybe_by_flow_type) + .bind(maybe_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())) + .bind(i64::try_from(pagination.limit).unwrap()) + .bind(i64::try_from(pagination.offset).unwrap()); + + for dataset_id in dataset_ids { + query = query.bind(dataset_id); + } + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let mut query_stream = query.try_map(|event_row: SqliteRow| { + Ok(FlowID::new(event_row.get(0))) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + fn get_unique_flow_initiator_ids_by_dataset( + &self, + dataset_id: &DatasetID, + ) -> InitiatorIDStream { + let dataset_id = dataset_id.to_string(); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT DISTINCT(initiator) FROM flows + WHERE dataset_id = $1 AND initiator != $2 + "#, + dataset_id, + SYSTEM_INITIATOR, + ).try_map(|event_row| { + Ok(AccountID::from_did_str(&event_row.initiator).unwrap()) + }) + .fetch(connection_mut); + + while let Some(initiator) = query_stream.try_next().await.int_err()? { + yield Ok(initiator); + } + }) + } + + fn get_all_system_flow_ids( + &self, + filters: &SystemFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream { + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let maybe_by_flow_type = filters.by_flow_type; + let maybe_by_flow_status = filters.by_flow_status; + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let query_str = format!( + r#" + SELECT flow_id FROM flows + WHERE system_flow_type IS NOT NULL + AND (cast($1 as system_flow_type) IS NULL OR system_flow_type = $1) + AND (cast($2 as flow_status_type) IS NULL OR flow_status = $2) + AND ($3 = 0 OR initiator IN ({})) + ORDER BY flow_id DESC + LIMIT $4 OFFSET $5 + "#, + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 6)) + .unwrap_or_default() + ); + + let mut query = sqlx::query(&query_str) + .bind(maybe_by_flow_type) + .bind(maybe_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())) + .bind(i64::try_from(pagination.limit).unwrap()) + .bind(i64::try_from(pagination.offset).unwrap()); + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let mut query_stream = query.try_map(|event_row: SqliteRow| { + Ok(FlowID::new(event_row.get(0))) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + async fn get_count_system_flows( + &self, + filters: &SystemFlowFilters, + ) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_by_flow_type = filters.by_flow_type; + let maybe_by_flow_status = filters.by_flow_status; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let query_str = format!( + r#" + SELECT COUNT(flow_id) AS flows_count + FROM flows + WHERE system_flow_type IS NOT NULL + AND (cast($1 as system_flow_type) IS NULL OR system_flow_type = $1) + AND (cast($2 as flow_status_type) IS NULL OR flow_status = $2) + AND ($3 = 0 OR initiator IN ({})) + "#, + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 4)) + .unwrap_or_default() + ); + + let mut query = sqlx::query(&query_str) + .bind(maybe_by_flow_type) + .bind(maybe_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())); + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let query_result = query.fetch_one(connection_mut).await.int_err()?; + let flows_count: i64 = query_result.get(0); + + Ok(usize::try_from(flows_count).unwrap()) + } + + fn get_all_flow_ids( + &self, + filters: &AllFlowFilters, + pagination: PaginationOpts, + ) -> FlowIDStream<'_> { + let maybe_by_flow_status = filters.by_flow_status; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await?; + + let query_str = format!( + r#" + SELECT flow_id FROM flows + WHERE + (cast($1 as flow_status_type) IS NULL OR flow_status = $1) + AND ($2 = 0 OR initiator IN ({})) + ORDER BY flow_id DESC + LIMIT $3 OFFSET $4 + "#, + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 5)) + .unwrap_or_default() + ); + + let mut query = sqlx::query(&query_str) + .bind(maybe_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())) + .bind(i64::try_from(pagination.limit).unwrap()) + .bind(i64::try_from(pagination.offset).unwrap()); + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let mut query_stream = query.try_map(|event_row: SqliteRow| { + Ok(FlowID::new(event_row.get(0))) + }) + .fetch(connection_mut); + + while let Some(flow_id) = query_stream.try_next().await.int_err()? { + yield Ok(flow_id); + } + }) + } + + async fn get_count_all_flows(&self, filters: &AllFlowFilters) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_by_flow_status = filters.by_flow_status; + + let maybe_initiators = filters + .by_initiator + .as_ref() + .map(Self::prepare_initiator_filter); + + let query_str = format!( + r#" + SELECT COUNT(flow_id) AS flows_count + FROM flows + WHERE + (cast($1 as flow_status_type) IS NULL OR flow_status = $1) + AND ($2 = 0 OR initiator IN ({})) + "#, + maybe_initiators + .as_ref() + .map(|initiators| Self::generate_placeholders_list(initiators, 3)) + .unwrap_or_default() + ); + + let mut query = sqlx::query(&query_str) + .bind(maybe_by_flow_status) + .bind(i32::from(maybe_initiators.is_some())); + + if let Some(initiators) = maybe_initiators { + for initiator in initiators { + query = query.bind(initiator); + } + } + + let query_result = query.fetch_one(connection_mut).await.int_err()?; + let flows_count: i64 = query_result.get(0); + + Ok(usize::try_from(flows_count).unwrap()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/flow-system/sqlite/tests/tests/mod.rs b/src/infra/flow-system/sqlite/tests/tests/mod.rs index e9edb7d9d..bde4d971f 100644 --- a/src/infra/flow-system/sqlite/tests/tests/mod.rs +++ b/src/infra/flow-system/sqlite/tests/tests/mod.rs @@ -8,3 +8,4 @@ // by the Apache License, Version 2.0. mod test_sqlite_flow_configuration_event_store; +mod test_sqlite_flow_event_store; diff --git a/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_configuration_event_store.rs b/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_configuration_event_store.rs index 0a125f41f..6d5f85ebd 100644 --- a/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_configuration_event_store.rs +++ b/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_configuration_event_store.rs @@ -10,14 +10,15 @@ use database_common::SqliteTransactionManager; use database_common_macros::database_transactional_test; use dill::{Catalog, CatalogBuilder}; -use kamu_flow_system_sqlite::SqliteFlowSystemEventStore; +use kamu_flow_system_sqlite::SqliteFlowConfigurationEventStore; use sqlx::SqlitePool; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// database_transactional_test!( storage = sqlite, - fixture = kamu_flow_system_repo_tests::test_event_store_empty, + fixture = + kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_empty, harness = SqliteFlowConfigurationEventStoreHarness ); @@ -25,7 +26,7 @@ database_transactional_test!( database_transactional_test!( storage = sqlite, - fixture = kamu_flow_system_repo_tests::test_event_store_get_streams, + fixture = kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_get_streams, harness = SqliteFlowConfigurationEventStoreHarness ); @@ -33,7 +34,7 @@ database_transactional_test!( database_transactional_test!( storage = sqlite, - fixture = kamu_flow_system_repo_tests::test_event_store_get_events_with_windowing, + fixture = kamu_flow_system_repo_tests::test_flow_configuration_event_store::test_event_store_get_events_with_windowing, harness = SqliteFlowConfigurationEventStoreHarness ); @@ -49,7 +50,7 @@ impl SqliteFlowConfigurationEventStoreHarness { let mut catalog_builder = CatalogBuilder::new(); catalog_builder.add_value(sqlite_pool); catalog_builder.add::(); - catalog_builder.add::(); + catalog_builder.add::(); Self { catalog: catalog_builder.build(), diff --git a/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_event_store.rs b/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_event_store.rs new file mode 100644 index 000000000..220286ca6 --- /dev/null +++ b/src/infra/flow-system/sqlite/tests/tests/test_sqlite_flow_event_store.rs @@ -0,0 +1,278 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::SqliteTransactionManager; +use database_common_macros::database_transactional_test; +use dill::{Catalog, CatalogBuilder}; +use kamu_flow_system_sqlite::SqliteFlowEventStore; +use sqlx::SqlitePool; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_empty_filters_distingush_dataset, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_status, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_flow_type, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_initiator, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_initiator_with_multiple_variants, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_combinations, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets_and_status, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_filter_by_datasets_with_pagination, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_pagination, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_pagination_with_filters, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_get_flow_initiators, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_unfiltered_system_flows, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_flow_type, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_flow_status, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_filtered_by_initiator, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flows_complex_filter, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_pagination, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_pagination_with_filters, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_unpaged, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_pagination, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_all_flows_filters, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_dataset_flow_run_stats, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_flow_system_repo_tests::test_flow_event_store::test_system_flow_run_stats, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_dataset_single_type_crud, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_dataset_multiple_types_crud, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_multiple_datasets_crud, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = + kamu_flow_system_repo_tests::test_flow_event_store::test_pending_flow_system_flow_crud, + harness = SqliteFlowEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Harness +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct SqliteFlowEventStoreHarness { + catalog: Catalog, +} + +impl SqliteFlowEventStoreHarness { + pub fn new(sqlite_pool: SqlitePool) -> Self { + // Initialize catalog with predefined Postgres pool + let mut catalog_builder = CatalogBuilder::new(); + catalog_builder.add_value(sqlite_pool); + catalog_builder.add::(); + catalog_builder.add::(); + + Self { + catalog: catalog_builder.build(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_consumption_repository.rs b/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_consumption_repository.rs index 5bad0a1bd..c41c712c6 100644 --- a/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_consumption_repository.rs +++ b/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_consumption_repository.rs @@ -44,9 +44,7 @@ impl InMemoryOutboxMessageConsumptionRepository { #[async_trait::async_trait] impl OutboxMessageConsumptionRepository for InMemoryOutboxMessageConsumptionRepository { - async fn list_consumption_boundaries( - &self, - ) -> Result { + fn list_consumption_boundaries(&self) -> OutboxMessageConsumptionBoundariesStream { let boundaries = { let guard = self.state.lock().unwrap(); guard @@ -57,7 +55,7 @@ impl OutboxMessageConsumptionRepository for InMemoryOutboxMessageConsumptionRepo .collect::>() }; - Ok(Box::pin(tokio_stream::iter(boundaries))) + Box::pin(tokio_stream::iter(boundaries)) } async fn find_consumption_boundary( diff --git a/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_repository.rs b/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_repository.rs index 105c83cb1..e5f94cf3f 100644 --- a/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_repository.rs +++ b/src/infra/messaging-outbox/inmem/src/repos/inmem_outbox_message_repository.rs @@ -81,12 +81,12 @@ impl OutboxMessageRepository for InMemoryOutboxMessageRepository { Ok(()) } - async fn get_producer_messages( + fn get_producer_messages( &self, producer_name: &str, above_id: OutboxMessageID, batch_size: usize, - ) -> Result { + ) -> OutboxMessageStream { let messages = { let mut messages = Vec::new(); @@ -105,7 +105,7 @@ impl OutboxMessageRepository for InMemoryOutboxMessageRepository { messages }; - Ok(Box::pin(tokio_stream::iter(messages))) + Box::pin(tokio_stream::iter(messages)) } async fn get_latest_message_ids_by_producer( diff --git a/src/infra/messaging-outbox/postgres/.sqlx/query-6a7bc6be8d4f035137972579bfef2f87019a4ba595fb156f968c235723a6cdaf.json b/src/infra/messaging-outbox/postgres/.sqlx/query-e843298c86a1f0d75bce5f91cb6d8d177b4b994dec17fe9fe8786a9a7a4752ae.json similarity index 78% rename from src/infra/messaging-outbox/postgres/.sqlx/query-6a7bc6be8d4f035137972579bfef2f87019a4ba595fb156f968c235723a6cdaf.json rename to src/infra/messaging-outbox/postgres/.sqlx/query-e843298c86a1f0d75bce5f91cb6d8d177b4b994dec17fe9fe8786a9a7a4752ae.json index 77043f650..34d31812f 100644 --- a/src/infra/messaging-outbox/postgres/.sqlx/query-6a7bc6be8d4f035137972579bfef2f87019a4ba595fb156f968c235723a6cdaf.json +++ b/src/infra/messaging-outbox/postgres/.sqlx/query-e843298c86a1f0d75bce5f91cb6d8d177b4b994dec17fe9fe8786a9a7a4752ae.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n producer_name,\n max(message_id) as max_message_id\n FROM outbox_messages\n GROUP BY producer_name\n ", + "query": "\n SELECT\n producer_name,\n max(message_id) AS max_message_id\n FROM outbox_messages\n GROUP BY producer_name\n ", "describe": { "columns": [ { @@ -22,5 +22,5 @@ null ] }, - "hash": "6a7bc6be8d4f035137972579bfef2f87019a4ba595fb156f968c235723a6cdaf" + "hash": "e843298c86a1f0d75bce5f91cb6d8d177b4b994dec17fe9fe8786a9a7a4752ae" } diff --git a/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_consumption_repository.rs b/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_consumption_repository.rs index f9ebb0e73..faff35ed3 100644 --- a/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_consumption_repository.rs +++ b/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_consumption_repository.rs @@ -31,12 +31,9 @@ impl PostgresOutboxMessageConsumptionRepository { #[async_trait::async_trait] impl OutboxMessageConsumptionRepository for PostgresOutboxMessageConsumptionRepository { - async fn list_consumption_boundaries( - &self, - ) -> Result { - let mut tr = self.transaction.lock().await; - - Ok(Box::pin(async_stream::stream! { + fn list_consumption_boundaries(&self) -> OutboxMessageConsumptionBoundariesStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -56,7 +53,7 @@ impl OutboxMessageConsumptionRepository for PostgresOutboxMessageConsumptionRepo while let Some(consumption) = query_stream.try_next().await? { yield Ok(consumption); } - })) + }) } async fn find_consumption_boundary( diff --git a/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_repository.rs b/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_repository.rs index 692fdbd08..b780a3509 100644 --- a/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_repository.rs +++ b/src/infra/messaging-outbox/postgres/src/repos/outbox_postgres_message_repository.rs @@ -53,17 +53,16 @@ impl OutboxMessageRepository for PostgresOutboxMessageRepository { Ok(()) } - async fn get_producer_messages( + fn get_producer_messages( &self, producer_name: &str, above_id: OutboxMessageID, batch_size: usize, - ) -> Result { - let mut tr = self.transaction.lock().await; - + ) -> OutboxMessageStream { let producer_name = producer_name.to_string(); - Ok(Box::pin(async_stream::stream! { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -91,7 +90,7 @@ impl OutboxMessageRepository for PostgresOutboxMessageRepository { while let Some(message) = query_stream.try_next().await? { yield Ok(message); } - })) + }) } async fn get_latest_message_ids_by_producer( @@ -104,7 +103,7 @@ impl OutboxMessageRepository for PostgresOutboxMessageRepository { r#" SELECT producer_name, - max(message_id) as max_message_id + max(message_id) AS max_message_id FROM outbox_messages GROUP BY producer_name "#, diff --git a/src/infra/messaging-outbox/repo-tests/src/outbox_message_consumption_repository_test_suite.rs b/src/infra/messaging-outbox/repo-tests/src/outbox_message_consumption_repository_test_suite.rs index d3a6d0cc8..21be70489 100644 --- a/src/infra/messaging-outbox/repo-tests/src/outbox_message_consumption_repository_test_suite.rs +++ b/src/infra/messaging-outbox/repo-tests/src/outbox_message_consumption_repository_test_suite.rs @@ -230,8 +230,6 @@ async fn read_boundaries( use futures::TryStreamExt; outbox_message_consumption_repo .list_consumption_boundaries() - .await - .unwrap() .try_collect() .await .unwrap() diff --git a/src/infra/messaging-outbox/repo-tests/src/outbox_message_repository_test_suite.rs b/src/infra/messaging-outbox/repo-tests/src/outbox_message_repository_test_suite.rs index 9faaa4c46..d4a45e4ec 100644 --- a/src/infra/messaging-outbox/repo-tests/src/outbox_message_repository_test_suite.rs +++ b/src/infra/messaging-outbox/repo-tests/src/outbox_message_repository_test_suite.rs @@ -109,8 +109,6 @@ pub async fn test_push_many_messages_and_read_parts(catalog: &Catalog) { let messages: Vec<_> = outbox_message_repo .get_producer_messages("A", OutboxMessageID::new(0), 3) - .await - .unwrap() .try_collect() .await .unwrap(); @@ -122,8 +120,6 @@ pub async fn test_push_many_messages_and_read_parts(catalog: &Catalog) { let messages: Vec<_> = outbox_message_repo .get_producer_messages("A", OutboxMessageID::new(5), 4) - .await - .unwrap() .try_collect() .await .unwrap(); @@ -172,8 +168,6 @@ pub async fn test_try_reading_above_max(catalog: &Catalog) { let messages: Vec<_> = outbox_message_repo .get_producer_messages("A", OutboxMessageID::new(5), 3) - .await - .unwrap() .try_collect() .await .unwrap(); @@ -181,8 +175,6 @@ pub async fn test_try_reading_above_max(catalog: &Catalog) { let messages: Vec<_> = outbox_message_repo .get_producer_messages("A", OutboxMessageID::new(3), 6) - .await - .unwrap() .try_collect() .await .unwrap(); diff --git a/src/infra/messaging-outbox/sqlite/.sqlx/query-d1d9fdcec93cbf10079386a1f8aaee30cde75a4a9afcb4c2485825e0fec0eb7b.json b/src/infra/messaging-outbox/sqlite/.sqlx/query-ddc1e8352acde4e016fcba702e93edd26d01ff0ae4926d38936e47ba538f2635.json similarity index 79% rename from src/infra/messaging-outbox/sqlite/.sqlx/query-d1d9fdcec93cbf10079386a1f8aaee30cde75a4a9afcb4c2485825e0fec0eb7b.json rename to src/infra/messaging-outbox/sqlite/.sqlx/query-ddc1e8352acde4e016fcba702e93edd26d01ff0ae4926d38936e47ba538f2635.json index 58df9df99..bf2ca87fc 100644 --- a/src/infra/messaging-outbox/sqlite/.sqlx/query-d1d9fdcec93cbf10079386a1f8aaee30cde75a4a9afcb4c2485825e0fec0eb7b.json +++ b/src/infra/messaging-outbox/sqlite/.sqlx/query-ddc1e8352acde4e016fcba702e93edd26d01ff0ae4926d38936e47ba538f2635.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\n SELECT\n producer_name,\n IFNULL(MAX(message_id), 0) as max_message_id\n FROM outbox_messages\n GROUP BY producer_name\n ", + "query": "\n SELECT\n producer_name,\n IFNULL(MAX(message_id), 0) AS max_message_id\n FROM outbox_messages\n GROUP BY producer_name\n ", "describe": { "columns": [ { @@ -22,5 +22,5 @@ false ] }, - "hash": "d1d9fdcec93cbf10079386a1f8aaee30cde75a4a9afcb4c2485825e0fec0eb7b" + "hash": "ddc1e8352acde4e016fcba702e93edd26d01ff0ae4926d38936e47ba538f2635" } diff --git a/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_consumption_repository.rs b/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_consumption_repository.rs index f9060c0b7..74324ff4e 100644 --- a/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_consumption_repository.rs +++ b/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_consumption_repository.rs @@ -31,12 +31,9 @@ impl SqliteOutboxMessageConsumptionRepository { #[async_trait::async_trait] impl OutboxMessageConsumptionRepository for SqliteOutboxMessageConsumptionRepository { - async fn list_consumption_boundaries( - &self, - ) -> Result { - let mut tr = self.transaction.lock().await; - - Ok(Box::pin(async_stream::stream! { + fn list_consumption_boundaries(&self) -> OutboxMessageConsumptionBoundariesStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -56,7 +53,7 @@ impl OutboxMessageConsumptionRepository for SqliteOutboxMessageConsumptionReposi while let Some(consumption) = query_stream.try_next().await? { yield Ok(consumption); } - })) + }) } async fn find_consumption_boundary( diff --git a/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_repository.rs b/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_repository.rs index 35c18c91f..cf860092a 100644 --- a/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_repository.rs +++ b/src/infra/messaging-outbox/sqlite/src/repos/sqlite_outbox_message_repository.rs @@ -55,17 +55,16 @@ impl OutboxMessageRepository for SqliteOutboxMessageRepository { Ok(()) } - async fn get_producer_messages( + fn get_producer_messages( &self, producer_name: &str, above_id: OutboxMessageID, batch_size: usize, - ) -> Result { - let mut tr = self.transaction.lock().await; - + ) -> OutboxMessageStream { let producer_name = producer_name.to_string(); - Ok(Box::pin(async_stream::stream! { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -96,7 +95,7 @@ impl OutboxMessageRepository for SqliteOutboxMessageRepository { while let Some(message) = query_stream.try_next().await? { yield Ok(message); } - })) + }) } async fn get_latest_message_ids_by_producer( @@ -109,7 +108,7 @@ impl OutboxMessageRepository for SqliteOutboxMessageRepository { r#" SELECT producer_name, - IFNULL(MAX(message_id), 0) as max_message_id + IFNULL(MAX(message_id), 0) AS max_message_id FROM outbox_messages GROUP BY producer_name "#, diff --git a/src/infra/task-system/inmem/Cargo.toml b/src/infra/task-system/inmem/Cargo.toml index 1c7767a96..eb040f170 100644 --- a/src/infra/task-system/inmem/Cargo.toml +++ b/src/infra/task-system/inmem/Cargo.toml @@ -22,6 +22,7 @@ doctest = false [dependencies] +database-common = { workspace = true } opendatafabric = { workspace = true } kamu-task-system = { workspace = true } diff --git a/src/infra/task-system/inmem/src/inmem_task_system_event_store.rs b/src/infra/task-system/inmem/src/inmem_task_event_store.rs similarity index 60% rename from src/infra/task-system/inmem/src/inmem_task_system_event_store.rs rename to src/infra/task-system/inmem/src/inmem_task_event_store.rs index 7fe110e54..628407909 100644 --- a/src/infra/task-system/inmem/src/inmem_task_system_event_store.rs +++ b/src/infra/task-system/inmem/src/inmem_task_event_store.rs @@ -8,14 +8,16 @@ // by the Apache License, Version 2.0. use std::collections::hash_map::{Entry, HashMap}; +use std::collections::BTreeMap; +use database_common::PaginationOpts; use dill::*; use kamu_task_system::*; use opendatafabric::DatasetID; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct InMemoryTaskSystemEventStore { +pub struct InMemoryTaskEventStore { inner: InMemoryEventStore, } @@ -25,13 +27,14 @@ pub struct InMemoryTaskSystemEventStore { struct State { events: Vec, tasks_by_dataset: HashMap>, + task_statuses: BTreeMap, last_task_id: Option, } impl State { fn next_task_id(&mut self) -> TaskID { let new_task_id = if let Some(last_task_id) = self.last_task_id { - let id: i64 = last_task_id.into(); + let id: u64 = last_task_id.into(); TaskID::new(id + 1) } else { TaskID::new(0) @@ -58,41 +61,42 @@ impl EventStoreState for State { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[component(pub)] -#[interface(dyn TaskSystemEventStore)] +#[interface(dyn TaskEventStore)] #[scope(Singleton)] -impl InMemoryTaskSystemEventStore { +impl InMemoryTaskEventStore { pub fn new() -> Self { Self { inner: InMemoryEventStore::new(), } } - fn update_index_by_dataset( - tasks_by_dataset: &mut HashMap>, - event: &TaskEvent, - ) { + fn update_index(state: &mut State, event: &TaskEvent) { if let TaskEvent::TaskCreated(e) = &event { if let Some(dataset_id) = e.logical_plan.dataset_id() { - let entries = match tasks_by_dataset.entry(dataset_id.clone()) { + let entries = match state.tasks_by_dataset.entry(dataset_id.clone()) { Entry::Occupied(v) => v.into_mut(), Entry::Vacant(v) => v.insert(Vec::default()), }; entries.push(event.task_id()); } } + + state + .task_statuses + .insert(event.task_id(), event.new_status()); } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl EventStore for InMemoryTaskSystemEventStore { +impl EventStore for InMemoryTaskEventStore { async fn len(&self) -> Result { self.inner.len().await } - async fn get_events(&self, task_id: &TaskID, opts: GetEventsOpts) -> EventStream { - self.inner.get_events(task_id, opts).await + fn get_events(&self, task_id: &TaskID, opts: GetEventsOpts) -> EventStream { + self.inner.get_events(task_id, opts) } async fn save_events( @@ -108,7 +112,7 @@ impl EventStore for InMemoryTaskSystemEventStore { let state = self.inner.as_state(); let mut g = state.lock().unwrap(); for event in &events { - Self::update_index_by_dataset(&mut g.tasks_by_dataset, event); + Self::update_index(&mut g, event); } } @@ -119,15 +123,64 @@ impl EventStore for InMemoryTaskSystemEventStore { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl TaskSystemEventStore for InMemoryTaskSystemEventStore { +impl TaskEventStore for InMemoryTaskEventStore { + /// Generates new unique task identifier async fn new_task_id(&self) -> Result { Ok(self.inner.as_state().lock().unwrap().next_task_id()) } - async fn get_tasks_by_dataset( + /// Attempts to get the earliest queued task, if any + async fn try_get_queued_task(&self) -> Result, InternalError> { + let state = self.inner.as_state(); + let g = state.lock().unwrap(); + let maybe_task_id = g + .task_statuses + .iter() + .find(|(_, status)| **status == TaskStatus::Queued) + .map(|(id, _)| *id); + Ok(maybe_task_id) + } + + /// Returns list of tasks, which are in Running state, + /// from earliest to latest + fn get_running_tasks(&self, pagination: PaginationOpts) -> TaskIDStream { + let task_ids_page: Vec<_> = { + let state = self.inner.as_state(); + let g = state.lock().unwrap(); + g.task_statuses + .iter() + .filter(|(_, status)| **status == TaskStatus::Running) + .skip(pagination.offset) + .take(pagination.limit) + .map(|(id, _)| Ok(*id)) + .collect() + }; + + Box::pin(futures::stream::iter(task_ids_page)) + } + + /// Returns total number of tasks, which are in Running state + async fn get_count_running_tasks(&self) -> Result { + let state = self.inner.as_state(); + let g = state.lock().unwrap(); + let mut count = 0; + + for task_status in g.task_statuses.values() { + if *task_status == TaskStatus::Running { + count += 1; + } + } + + Ok(count) + } + + /// Returns page of the tasks associated with the specified dataset in + /// reverse chronological order based on creation time + /// Note: no longer used, but might be used in future (admin view) + fn get_tasks_by_dataset( &self, dataset_id: &DatasetID, - pagination: TaskPaginationOpts, + pagination: PaginationOpts, ) -> TaskIDStream { let task_ids_page: Option> = { let state = self.inner.as_state(); @@ -149,6 +202,8 @@ impl TaskSystemEventStore for InMemoryTaskSystemEventStore { } } + /// Returns total number of tasks associated with the specified dataset + /// Note: no longer used, but might be used in future (admin view) async fn get_count_tasks_by_dataset( &self, dataset_id: &DatasetID, diff --git a/src/infra/task-system/inmem/src/lib.rs b/src/infra/task-system/inmem/src/lib.rs index a191d8653..0b82ab6fa 100644 --- a/src/infra/task-system/inmem/src/lib.rs +++ b/src/infra/task-system/inmem/src/lib.rs @@ -14,6 +14,6 @@ // Re-exports pub use kamu_task_system as domain; -mod inmem_task_system_event_store; +mod inmem_task_event_store; -pub use inmem_task_system_event_store::*; +pub use inmem_task_event_store::*; diff --git a/src/infra/task-system/inmem/tests/tests/test_inmem_task_system_event_store.rs b/src/infra/task-system/inmem/tests/tests/test_inmem_task_system_event_store.rs index c6cb662b8..4f3e8ac1d 100644 --- a/src/infra/task-system/inmem/tests/tests/test_inmem_task_system_event_store.rs +++ b/src/infra/task-system/inmem/tests/tests/test_inmem_task_system_event_store.rs @@ -9,7 +9,7 @@ use database_common_macros::database_transactional_test; use dill::{Catalog, CatalogBuilder}; -use kamu_task_system_inmem::*; +use kamu_task_system_inmem::InMemoryTaskEventStore; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -53,6 +53,30 @@ database_transactional_test!( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +database_transactional_test!( + storage = inmem, + fixture = kamu_task_system_repo_tests::test_event_store_try_get_queued_single_task, + harness = InMemoryTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = kamu_task_system_repo_tests::test_event_store_try_get_queued_multiple_tasks, + harness = InMemoryTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = kamu_task_system_repo_tests::test_event_store_get_running_tasks, + harness = InMemoryTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct InMemoryTaskSystemEventStoreHarness { catalog: Catalog, } @@ -60,7 +84,7 @@ struct InMemoryTaskSystemEventStoreHarness { impl InMemoryTaskSystemEventStoreHarness { pub fn new() -> Self { let mut catalog_builder = CatalogBuilder::new(); - catalog_builder.add::(); + catalog_builder.add::(); Self { catalog: catalog_builder.build(), diff --git a/src/infra/task-system/postgres/.sqlx/query-07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd.json b/src/infra/task-system/postgres/.sqlx/query-07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd.json new file mode 100644 index 000000000..a29e17b58 --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(event_id) AS events_count from task_events\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "events_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-0ffd4d6230e7b92742b0cdae411b1032af0cd76e3ae2c89a6f283aaadc24edf3.json b/src/infra/task-system/postgres/.sqlx/query-0ffd4d6230e7b92742b0cdae411b1032af0cd76e3ae2c89a6f283aaadc24edf3.json new file mode 100644 index 000000000..11d19753e --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-0ffd4d6230e7b92742b0cdae411b1032af0cd76e3ae2c89a6f283aaadc24edf3.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(task_id) AS tasks_count FROM tasks\n WHERE task_status = 'running'::task_status_type\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "tasks_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "0ffd4d6230e7b92742b0cdae411b1032af0cd76e3ae2c89a6f283aaadc24edf3" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-1110cf814e0238dc1d5c2c87893dded51e80eb634bd35c0e96203a3b5f668b82.json b/src/infra/task-system/postgres/.sqlx/query-1110cf814e0238dc1d5c2c87893dded51e80eb634bd35c0e96203a3b5f668b82.json new file mode 100644 index 000000000..42a20ef40 --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-1110cf814e0238dc1d5c2c87893dded51e80eb634bd35c0e96203a3b5f668b82.json @@ -0,0 +1,23 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT task_id FROM tasks\n WHERE task_status = 'running'::task_status_type\n ORDER BY task_id ASC\n LIMIT $1 OFFSET $2\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "task_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "1110cf814e0238dc1d5c2c87893dded51e80eb634bd35c0e96203a3b5f668b82" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-8addf79d53a4f753db9ae775c771f3fc960fc723ccc9cff6c7aa55e5f51d3e69.json b/src/infra/task-system/postgres/.sqlx/query-26cf41184fd27dca94603b4c2d4e1563d3675de58f912c96ba5bf1b4c78f0bd1.json similarity index 66% rename from src/infra/task-system/postgres/.sqlx/query-8addf79d53a4f753db9ae775c771f3fc960fc723ccc9cff6c7aa55e5f51d3e69.json rename to src/infra/task-system/postgres/.sqlx/query-26cf41184fd27dca94603b4c2d4e1563d3675de58f912c96ba5bf1b4c78f0bd1.json index a95044c22..3760420a3 100644 --- a/src/infra/task-system/postgres/.sqlx/query-8addf79d53a4f753db9ae775c771f3fc960fc723ccc9cff6c7aa55e5f51d3e69.json +++ b/src/infra/task-system/postgres/.sqlx/query-26cf41184fd27dca94603b4c2d4e1563d3675de58f912c96ba5bf1b4c78f0bd1.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT nextval('task_id_seq') as new_task_id\n ", + "query": "\n SELECT nextval('task_id_seq') AS new_task_id\n ", "describe": { "columns": [ { @@ -16,5 +16,5 @@ null ] }, - "hash": "8addf79d53a4f753db9ae775c771f3fc960fc723ccc9cff6c7aa55e5f51d3e69" + "hash": "26cf41184fd27dca94603b4c2d4e1563d3675de58f912c96ba5bf1b4c78f0bd1" } diff --git a/src/infra/task-system/postgres/.sqlx/query-52a6f2739f4714fcf26e8a738268a0d4a4926b5fd31d450bca4c509db48c7101.json b/src/infra/task-system/postgres/.sqlx/query-52a6f2739f4714fcf26e8a738268a0d4a4926b5fd31d450bca4c509db48c7101.json new file mode 100644 index 000000000..1d725643f --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-52a6f2739f4714fcf26e8a738268a0d4a4926b5fd31d450bca4c509db48c7101.json @@ -0,0 +1,22 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(task_id) AS tasks_count FROM tasks\n WHERE dataset_id = $1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "tasks_count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [ + null + ] + }, + "hash": "52a6f2739f4714fcf26e8a738268a0d4a4926b5fd31d450bca4c509db48c7101" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2.json b/src/infra/task-system/postgres/.sqlx/query-9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2.json new file mode 100644 index 000000000..3515125e6 --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "\n UPDATE tasks\n SET task_status = $2\n WHERE task_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int8", + { + "Custom": { + "name": "task_status_type", + "kind": { + "Enum": [ + "queued", + "running", + "finished" + ] + } + } + } + ] + }, + "nullable": [] + }, + "hash": "9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee.json b/src/infra/task-system/postgres/.sqlx/query-a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee.json new file mode 100644 index 000000000..2b5413463 --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee.json @@ -0,0 +1,27 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO tasks (task_id, dataset_id, task_status)\n VALUES ($1, $2, $3)\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Int8", + "Varchar", + { + "Custom": { + "name": "task_status_type", + "kind": { + "Enum": [ + "queued", + "running", + "finished" + ] + } + } + } + ] + }, + "nullable": [] + }, + "hash": "a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037.json b/src/infra/task-system/postgres/.sqlx/query-ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037.json deleted file mode 100644 index b407e24be..000000000 --- a/src/infra/task-system/postgres/.sqlx/query-ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT task_id\n FROM task_events\n WHERE dataset_id = $1 AND event_type = 'TaskEventCreated'\n ORDER BY task_id DESC LIMIT $2 OFFSET $3\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "task_id", - "type_info": "Int8" - } - ], - "parameters": { - "Left": [ - "Text", - "Int8", - "Int8" - ] - }, - "nullable": [ - false - ] - }, - "hash": "ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037" -} diff --git a/src/infra/task-system/postgres/.sqlx/query-cfc07091c0663a9d91b7c11ac0ef332628b349de81bbd687bc915d7d5b697bfc.json b/src/infra/task-system/postgres/.sqlx/query-cfc07091c0663a9d91b7c11ac0ef332628b349de81bbd687bc915d7d5b697bfc.json new file mode 100644 index 000000000..3c9e24e7f --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-cfc07091c0663a9d91b7c11ac0ef332628b349de81bbd687bc915d7d5b697bfc.json @@ -0,0 +1,24 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT task_id FROM tasks\n WHERE dataset_id = $1\n ORDER BY task_id DESC\n LIMIT $2 OFFSET $3\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "task_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Text", + "Int8", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "cfc07091c0663a9d91b7c11ac0ef332628b349de81bbd687bc915d7d5b697bfc" +} diff --git a/src/infra/task-system/postgres/.sqlx/query-f9f32dedeff3aaff8c2a61130d3a652f050de51e90954fda997c5139a7505534.json b/src/infra/task-system/postgres/.sqlx/query-f9f32dedeff3aaff8c2a61130d3a652f050de51e90954fda997c5139a7505534.json new file mode 100644 index 000000000..bc67269e3 --- /dev/null +++ b/src/infra/task-system/postgres/.sqlx/query-f9f32dedeff3aaff8c2a61130d3a652f050de51e90954fda997c5139a7505534.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT task_id FROM tasks\n WHERE task_status = 'queued'::task_status_type\n ORDER BY task_id ASC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "task_id", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false + ] + }, + "hash": "f9f32dedeff3aaff8c2a61130d3a652f050de51e90954fda997c5139a7505534" +} diff --git a/src/infra/task-system/postgres/src/lib.rs b/src/infra/task-system/postgres/src/lib.rs index 5f5ce876e..dc45f3498 100644 --- a/src/infra/task-system/postgres/src/lib.rs +++ b/src/infra/task-system/postgres/src/lib.rs @@ -10,6 +10,6 @@ // Re-exports pub use kamu_task_system as domain; -mod postgres_task_system_event_store; +mod postgres_task_event_store; -pub use postgres_task_system_event_store::*; +pub use postgres_task_event_store::*; diff --git a/src/infra/task-system/postgres/src/postgres_task_event_store.rs b/src/infra/task-system/postgres/src/postgres_task_event_store.rs new file mode 100644 index 000000000..d25149402 --- /dev/null +++ b/src/infra/task-system/postgres/src/postgres_task_event_store.rs @@ -0,0 +1,356 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; +use dill::*; +use futures::TryStreamExt; +use kamu_task_system::*; +use opendatafabric::DatasetID; +use sqlx::{FromRow, Postgres, QueryBuilder}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct PostgresTaskEventStore { + transaction: TransactionRefT, +} + +#[component(pub)] +#[interface(dyn TaskEventStore)] +impl PostgresTaskEventStore { + pub fn new(transaction: TransactionRef) -> Self { + Self { + transaction: transaction.into(), + } + } + + async fn save_task_updates_from_events( + &self, + tr: &mut database_common::TransactionGuard<'_, Postgres>, + events: &[TaskEvent], + ) -> Result<(), SaveEventsError> { + for event in events { + let connection_mut = tr.connection_mut().await?; + + let event_task_id: i64 = (event.task_id()).try_into().unwrap(); + + if let TaskEvent::TaskCreated(e) = &event { + let maybe_dataset_id = e.logical_plan.dataset_id(); + sqlx::query!( + r#" + INSERT INTO tasks (task_id, dataset_id, task_status) + VALUES ($1, $2, $3) + "#, + event_task_id, + maybe_dataset_id.map(ToString::to_string), + TaskStatus::Queued as TaskStatus, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + /* Existing task, update status */ + else { + let new_status = event.new_status(); + + sqlx::query!( + r#" + UPDATE tasks + SET task_status = $2 + WHERE task_id = $1 + "#, + event_task_id, + new_status as TaskStatus, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + } + + Ok(()) + } + + async fn save_events_impl( + &self, + tr: &mut database_common::TransactionGuard<'_, Postgres>, + events: &[TaskEvent], + ) -> Result { + let connection_mut = tr.connection_mut().await?; + + #[derive(FromRow)] + struct ResultRow { + event_id: i64, + } + + let mut query_builder = QueryBuilder::::new( + r#" + INSERT INTO task_events (task_id, event_time, event_type, event_payload) + "#, + ); + + query_builder.push_values(events, |mut b, event| { + let event_task_id: i64 = (event.task_id()).try_into().unwrap(); + b.push_bind(event_task_id); + b.push_bind(event.event_time()); + b.push_bind(event.typename()); + b.push_bind(serde_json::to_value(event).unwrap()); + }); + + query_builder.push("RETURNING event_id"); + + let rows = query_builder + .build_query_as::() + .fetch_all(connection_mut) + .await + .int_err()?; + + let last_event_id = rows.last().unwrap().event_id; + Ok(EventID::new(last_event_id)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl EventStore for PostgresTaskEventStore { + fn get_events(&self, task_id: &TaskID, opts: GetEventsOpts) -> EventStream { + let task_id: i64 = (*task_id).try_into().unwrap(); + let maybe_from_id = opts.from.map(EventID::into_inner); + let maybe_to_id = opts.to.map(EventID::into_inner); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr + .connection_mut() + .await?; + + let mut query_stream = sqlx::query!( + r#" + SELECT event_id, event_payload FROM task_events + WHERE task_id = $1 + AND (cast($2 as INT8) IS NULL or event_id > $2) + AND (cast($3 as INT8) IS NULL or event_id <= $3) + "#, + task_id, + maybe_from_id, + maybe_to_id, + ).try_map(|event_row| { + let event = match serde_json::from_value::(event_row.event_payload) { + Ok(event) => event, + Err(e) => return Err(sqlx::Error::Decode(Box::new(e))), + }; + Ok((EventID::new(event_row.event_id), event)) + }) + .fetch(connection_mut) + .map_err(|e| GetEventsError::Internal(e.int_err())); + + while let Some((event_id, event)) = query_stream.try_next().await? { + yield Ok((event_id, event)); + } + }) + } + + async fn save_events( + &self, + _task_id: &TaskID, + events: Vec, + ) -> Result { + if events.is_empty() { + return Err(SaveEventsError::NothingToSave); + } + + let mut tr = self.transaction.lock().await; + + self.save_task_updates_from_events(&mut tr, &events).await?; + let last_event_id = self.save_events_impl(&mut tr, &events).await?; + + Ok(last_event_id) + } + + async fn len(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT COUNT(event_id) AS events_count from task_events + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let count = usize::try_from(result.events_count.unwrap()).int_err()?; + Ok(count) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl TaskEventStore for PostgresTaskEventStore { + /// Generates new unique task identifier + async fn new_task_id(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT nextval('task_id_seq') AS new_task_id + "# + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let task_id = result.new_task_id.unwrap(); + Ok(TaskID::try_from(task_id).unwrap()) + } + + /// Attempts to get the earliest queued task, if any + async fn try_get_queued_task(&self) -> Result, InternalError> { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_task_id = sqlx::query!( + r#" + SELECT task_id FROM tasks + WHERE task_status = 'queued'::task_status_type + ORDER BY task_id ASC + LIMIT 1 + "#, + ) + .try_map(|event_row| { + let task_id = event_row.task_id; + Ok(TaskID::try_from(task_id).unwrap()) + }) + .fetch_optional(connection_mut) + .await + .map_err(ErrorIntoInternal::int_err)?; + + Ok(maybe_task_id) + } + + /// Returns list of tasks, which are in Running state, + /// from earliest to latest + fn get_running_tasks(&self, pagination: PaginationOpts) -> TaskIDStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let limit = i64::try_from(pagination.limit).int_err()?; + let offset = i64::try_from(pagination.offset).int_err()?; + + let mut query_stream = sqlx::query!( + r#" + SELECT task_id FROM tasks + WHERE task_status = 'running'::task_status_type + ORDER BY task_id ASC + LIMIT $1 OFFSET $2 + "#, + limit, + offset, + ) + .try_map(|event_row| { + let task_id = event_row.task_id; + Ok(TaskID::try_from(task_id).unwrap()) + }) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + while let Some(task_id) = query_stream.try_next().await? { + yield Ok(task_id); + } + }) + } + + /// Returns total number of tasks, which are in Running state + async fn get_count_running_tasks(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT COUNT(task_id) AS tasks_count FROM tasks + WHERE task_status = 'running'::task_status_type + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let count = usize::try_from(result.tasks_count.unwrap()).int_err()?; + Ok(count) + } + + /// Returns page of the tasks associated with the specified dataset in + /// reverse chronological order based on creation time + fn get_tasks_by_dataset( + &self, + dataset_id: &DatasetID, + pagination: PaginationOpts, + ) -> TaskIDStream { + let dataset_id = dataset_id.to_string(); + + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let limit = i64::try_from(pagination.limit).int_err()?; + let offset = i64::try_from(pagination.offset).int_err()?; + + let mut query_stream = sqlx::query!( + r#" + SELECT task_id FROM tasks + WHERE dataset_id = $1 + ORDER BY task_id DESC + LIMIT $2 OFFSET $3 + "#, + dataset_id, + limit, + offset, + ) + .try_map(|event_row| { + let task_id = event_row.task_id; + Ok(TaskID::try_from(task_id).unwrap()) + }) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + while let Some(task_id) = query_stream.try_next().await? { + yield Ok(task_id); + } + }) + } + + /// Returns total number of tasks associated with the specified dataset + async fn get_count_tasks_by_dataset( + &self, + dataset_id: &DatasetID, + ) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT COUNT(task_id) AS tasks_count FROM tasks + WHERE dataset_id = $1 + "#, + dataset_id.to_string() + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let count = usize::try_from(result.tasks_count.unwrap()).int_err()?; + Ok(count) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/task-system/postgres/src/postgres_task_system_event_store.rs b/src/infra/task-system/postgres/src/postgres_task_system_event_store.rs deleted file mode 100644 index fe6ec65ed..000000000 --- a/src/infra/task-system/postgres/src/postgres_task_system_event_store.rs +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use database_common::{TransactionRef, TransactionRefT}; -use dill::*; -use futures::TryStreamExt; -use kamu_task_system::*; -use opendatafabric::DatasetID; -use sqlx::{FromRow, QueryBuilder}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct PostgresTaskSystemEventStore { - transaction: TransactionRefT, -} - -#[component(pub)] -#[interface(dyn TaskSystemEventStore)] -impl PostgresTaskSystemEventStore { - pub fn new(transaction: TransactionRef) -> Self { - Self { - transaction: transaction.into(), - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl EventStore for PostgresTaskSystemEventStore { - async fn get_events(&self, task_id: &TaskID, opts: GetEventsOpts) -> EventStream { - let mut tr = self.transaction.lock().await; - - let task_id: i64 = (*task_id).into(); - let maybe_from_id = opts.from.map(EventID::into_inner); - let maybe_to_id = opts.to.map(EventID::into_inner); - - Box::pin(async_stream::stream! { - let connection_mut = tr - .connection_mut() - .await?; - - let mut query_stream = sqlx::query!( - r#" - SELECT event_id, event_payload FROM task_events - WHERE task_id = $1 - AND (cast($2 as INT8) IS NULL or event_id > $2) - AND (cast($3 as INT8) IS NULL or event_id <= $3) - "#, - task_id, - maybe_from_id, - maybe_to_id, - ).try_map(|event_row| { - let event = match serde_json::from_value::(event_row.event_payload) { - Ok(event) => event, - Err(e) => return Err(sqlx::Error::Decode(Box::new(e))), - }; - Ok((EventID::new(event_row.event_id), event)) - }) - .fetch(connection_mut) - .map_err(|e| GetEventsError::Internal(e.int_err())); - - while let Some((event_id, event)) = query_stream.try_next().await? { - yield Ok((event_id, event)); - } - }) - } - - async fn save_events( - &self, - _task_id: &TaskID, - events: Vec, - ) -> Result { - if events.is_empty() { - return Err(SaveEventsError::NothingToSave); - } - - let mut tr = self.transaction.lock().await; - let connection_mut = tr.connection_mut().await?; - - #[derive(FromRow)] - struct ResultRow { - event_id: i64, - } - - let mut query_builder = QueryBuilder::::new( - r#" - INSERT INTO task_events (task_id, dataset_id, event_time, event_type, event_payload) - "#, - ); - - query_builder.push_values(events, |mut b, event| { - let event_task_id: i64 = (event.task_id()).into(); - b.push_bind(event_task_id); - b.push_bind(event.dataset_id().map(ToString::to_string)); - b.push_bind(event.event_time()); - b.push_bind(event.typename()); - b.push_bind(serde_json::to_value(event).unwrap()); - }); - - query_builder.push("RETURNING event_id"); - - let rows = query_builder - .build_query_as::() - .fetch_all(connection_mut) - .await - .int_err()?; - let last_event_id = rows.last().unwrap().event_id; - - Ok(EventID::new(last_event_id)) - } - - async fn len(&self) -> Result { - let mut tr = self.transaction.lock().await; - let connection_mut = tr.connection_mut().await?; - - let result = sqlx::query!( - r#" - SELECT COUNT(event_id) from task_events - "#, - ) - .fetch_one(connection_mut) - .await - .int_err()?; - - let count = usize::try_from(result.count.unwrap()).int_err()?; - Ok(count) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl TaskSystemEventStore for PostgresTaskSystemEventStore { - /// Generates new unique task identifier - async fn new_task_id(&self) -> Result { - let mut tr = self.transaction.lock().await; - let connection_mut = tr.connection_mut().await?; - - let result = sqlx::query!( - r#" - SELECT nextval('task_id_seq') as new_task_id - "# - ) - .fetch_one(connection_mut) - .await - .int_err()?; - - Ok(TaskID::new(result.new_task_id.unwrap())) - } - - /// Returns page of the tasks associated with the specified dataset in - /// reverse chronological order based on creation time - async fn get_tasks_by_dataset( - &self, - dataset_id: &DatasetID, - pagination: TaskPaginationOpts, - ) -> TaskIDStream { - let mut tr = self.transaction.lock().await; - let dataset_id = dataset_id.to_string(); - - Box::pin(async_stream::stream! { - let connection_mut = tr.connection_mut().await?; - - let limit = i64::try_from(pagination.limit).int_err()?; - let offset = i64::try_from(pagination.offset).int_err()?; - - let mut query_stream = sqlx::query!( - r#" - SELECT task_id - FROM task_events - WHERE dataset_id = $1 AND event_type = 'TaskEventCreated' - ORDER BY task_id DESC LIMIT $2 OFFSET $3 - "#, - dataset_id, - limit, - offset, - ) - .try_map(|event_row| Ok(TaskID::new(event_row.task_id))) - .fetch(connection_mut) - .map_err(ErrorIntoInternal::int_err); - - while let Some(task_id) = query_stream.try_next().await? { - yield Ok(task_id); - } - }) - } - - /// Returns total number of tasks associated with the specified dataset - async fn get_count_tasks_by_dataset( - &self, - dataset_id: &DatasetID, - ) -> Result { - let mut tr = self.transaction.lock().await; - let connection_mut = tr.connection_mut().await?; - - let result = sqlx::query!( - r#" - SELECT COUNT(event_id) FROM task_events - WHERE dataset_id = $1 - "#, - dataset_id.to_string() - ) - .fetch_one(connection_mut) - .await - .int_err()?; - - let count = usize::try_from(result.count.unwrap()).int_err()?; - Ok(count) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/task-system/postgres/tests/tests/test_postgres_task_system_event_store.rs b/src/infra/task-system/postgres/tests/tests/test_postgres_task_system_event_store.rs index 4dde9c5ed..3044320f6 100644 --- a/src/infra/task-system/postgres/tests/tests/test_postgres_task_system_event_store.rs +++ b/src/infra/task-system/postgres/tests/tests/test_postgres_task_system_event_store.rs @@ -10,7 +10,7 @@ use database_common::PostgresTransactionManager; use database_common_macros::database_transactional_test; use dill::{Catalog, CatalogBuilder}; -use kamu_task_system_postgres::PostgresTaskSystemEventStore; +use kamu_task_system_postgres::PostgresTaskEventStore; use sqlx::PgPool; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -55,6 +55,30 @@ database_transactional_test!( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +database_transactional_test!( + storage = postgres, + fixture = kamu_task_system_repo_tests::test_event_store_try_get_queued_single_task, + harness = PostgresTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_task_system_repo_tests::test_event_store_try_get_queued_multiple_tasks, + harness = PostgresTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = kamu_task_system_repo_tests::test_event_store_get_running_tasks, + harness = PostgresTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct PostgresTaskSystemEventStoreHarness { catalog: Catalog, } @@ -65,7 +89,7 @@ impl PostgresTaskSystemEventStoreHarness { let mut catalog_builder = CatalogBuilder::new(); catalog_builder.add_value(pg_pool); catalog_builder.add::(); - catalog_builder.add::(); + catalog_builder.add::(); Self { catalog: catalog_builder.build(), diff --git a/src/infra/task-system/repo-tests/Cargo.toml b/src/infra/task-system/repo-tests/Cargo.toml index 723515c29..205d49019 100644 --- a/src/infra/task-system/repo-tests/Cargo.toml +++ b/src/infra/task-system/repo-tests/Cargo.toml @@ -22,6 +22,7 @@ doctest = false [dependencies] +database-common = { workspace = true } kamu-task-system = { workspace = true } opendatafabric = { workspace = true } diff --git a/src/infra/task-system/repo-tests/src/task_system_repository_test_suite.rs b/src/infra/task-system/repo-tests/src/task_system_repository_test_suite.rs index 93448d6a5..3762a31e9 100644 --- a/src/infra/task-system/repo-tests/src/task_system_repository_test_suite.rs +++ b/src/infra/task-system/repo-tests/src/task_system_repository_test_suite.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. use chrono::Utc; +use database_common::PaginationOpts; use dill::Catalog; use futures::TryStreamExt; use kamu_task_system::*; @@ -16,14 +17,13 @@ use opendatafabric::DatasetID; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_event_store_empty(catalog: &Catalog) { - let event_store = catalog.get_one::().unwrap(); + let event_store = catalog.get_one::().unwrap(); let num_events = event_store.len().await.unwrap(); assert_eq!(0, num_events); let events: Vec<_> = event_store .get_events(&TaskID::new(123), GetEventsOpts::default()) - .await .try_collect() .await .unwrap(); @@ -33,12 +33,11 @@ pub async fn test_event_store_empty(catalog: &Catalog) { let tasks: Vec<_> = event_store .get_tasks_by_dataset( &DatasetID::new_seeded_ed25519(b"foo"), - TaskPaginationOpts { + PaginationOpts { limit: 100, offset: 0, }, ) - .await .try_collect() .await .unwrap(); @@ -49,10 +48,10 @@ pub async fn test_event_store_empty(catalog: &Catalog) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_event_store_get_streams(catalog: &Catalog) { - let event_store = catalog.get_one::().unwrap(); + let event_store = catalog.get_one::().unwrap(); - let task_id_1 = TaskID::new(123); - let task_id_2 = TaskID::new(321); + let task_id_1 = event_store.new_task_id().await.unwrap(); + let task_id_2 = event_store.new_task_id().await.unwrap(); let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); let event_1 = TaskEventCreated { @@ -63,6 +62,7 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_2 = TaskEventCreated { @@ -73,6 +73,7 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_3 = TaskEventFinished { @@ -98,7 +99,6 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { let events: Vec<_> = event_store .get_events(&task_id_1, GetEventsOpts::default()) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -109,12 +109,11 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { let tasks: Vec<_> = event_store .get_tasks_by_dataset( &dataset_id, - TaskPaginationOpts { + PaginationOpts { limit: 100, offset: 0, }, ) - .await .try_collect() .await .unwrap(); @@ -126,7 +125,7 @@ pub async fn test_event_store_get_streams(catalog: &Catalog) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { - let event_store = catalog.get_one::().unwrap(); + let event_store = catalog.get_one::().unwrap(); let task_id = event_store.new_task_id().await.unwrap(); let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); @@ -139,6 +138,7 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_2 = TaskEventRunning { @@ -175,7 +175,6 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { to: None, }, ) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -194,7 +193,6 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { )), }, ) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -212,7 +210,6 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { to: Some(EventID::new(latest_event_id.into_inner() - 1)), }, ) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -224,7 +221,7 @@ pub async fn test_event_store_get_events_with_windowing(catalog: &Catalog) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_event_store_get_events_by_tasks(catalog: &Catalog) { - let event_store = catalog.get_one::().unwrap(); + let event_store = catalog.get_one::().unwrap(); let task_id_1 = event_store.new_task_id().await.unwrap(); let task_id_2 = event_store.new_task_id().await.unwrap(); @@ -238,6 +235,7 @@ pub async fn test_event_store_get_events_by_tasks(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_2_1 = TaskEventCreated { @@ -248,6 +246,7 @@ pub async fn test_event_store_get_events_by_tasks(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_1_2 = TaskEventRunning { @@ -304,7 +303,6 @@ pub async fn test_event_store_get_events_by_tasks(catalog: &Catalog) { let events: Vec<_> = event_store .get_events(&task_id_1, GetEventsOpts::default()) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -317,7 +315,6 @@ pub async fn test_event_store_get_events_by_tasks(catalog: &Catalog) { let events: Vec<_> = event_store .get_events(&task_id_2, GetEventsOpts::default()) - .await .map_ok(|(_, event)| event) .try_collect() .await @@ -332,7 +329,7 @@ pub async fn test_event_store_get_events_by_tasks(catalog: &Catalog) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { - let event_store = catalog.get_one::().unwrap(); + let event_store = catalog.get_one::().unwrap(); let task_id_1_1 = event_store.new_task_id().await.unwrap(); let task_id_2_1 = event_store.new_task_id().await.unwrap(); @@ -350,6 +347,7 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_1_2 = TaskEventCreated { @@ -360,6 +358,7 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_2_1 = TaskEventCreated { @@ -370,6 +369,7 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; let event_2_2 = TaskEventCreated { @@ -380,6 +380,7 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { ..Probe::default() } .into(), + metadata: None, }; event_store @@ -429,12 +430,11 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { let task_ids: Vec<_> = event_store .get_tasks_by_dataset( &dataset_id_foo, - TaskPaginationOpts { + PaginationOpts { limit: 5, offset: 0, }, ) - .await .try_collect() .await .unwrap(); @@ -444,12 +444,11 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { let task_ids: Vec<_> = event_store .get_tasks_by_dataset( &dataset_id_bar, - TaskPaginationOpts { + PaginationOpts { limit: 5, offset: 0, }, ) - .await .try_collect() .await .unwrap(); @@ -459,12 +458,11 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { let task_ids: Vec<_> = event_store .get_tasks_by_dataset( &dataset_id_foo, - TaskPaginationOpts { + PaginationOpts { limit: 1, offset: 0, }, ) - .await .try_collect() .await .unwrap(); @@ -474,12 +472,11 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { let task_ids: Vec<_> = event_store .get_tasks_by_dataset( &dataset_id_foo, - TaskPaginationOpts { + PaginationOpts { limit: 1, offset: 1, }, ) - .await .try_collect() .await .unwrap(); @@ -489,12 +486,11 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { let task_ids: Vec<_> = event_store .get_tasks_by_dataset( &dataset_id_foo, - TaskPaginationOpts { + PaginationOpts { limit: 1, offset: 2, }, ) - .await .try_collect() .await .unwrap(); @@ -503,3 +499,409 @@ pub async fn test_event_store_get_dataset_tasks(catalog: &Catalog) { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_event_store_try_get_queued_single_task(catalog: &Catalog) { + let event_store = catalog.get_one::().unwrap(); + + // Initially, there is nothing to get + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert!(maybe_task_id.is_none()); + + // Schedule a task + let task_id_1 = event_store.new_task_id().await.unwrap(); + event_store + .save_events( + &task_id_1, + vec![TaskEventCreated { + event_time: Utc::now(), + task_id: task_id_1, + logical_plan: Probe::default().into(), + metadata: None, + } + .into()], + ) + .await + .unwrap(); + + // The only queued task should be returned + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_id_1)); + + // Mark the task as running + event_store + .save_events( + &task_id_1, + vec![TaskEventRunning { + event_time: Utc::now(), + task_id: task_id_1, + } + .into()], + ) + .await + .unwrap(); + + // Right now nothing should be visible + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert!(maybe_task_id.is_none()); + + // Requeue the task (server restarted) + event_store + .save_events( + &task_id_1, + vec![TaskEventRequeued { + event_time: Utc::now(), + task_id: task_id_1, + } + .into()], + ) + .await + .unwrap(); + + // The task should be visible again + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_id_1)); + + // Now run and finish the task + event_store + .save_events( + &task_id_1, + vec![ + TaskEventRunning { + event_time: Utc::now(), + task_id: task_id_1, + } + .into(), + TaskEventFinished { + event_time: Utc::now(), + task_id: task_id_1, + outcome: TaskOutcome::Success(TaskResult::Empty), + } + .into(), + ], + ) + .await + .unwrap(); + + // The task should disappear again + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert!(maybe_task_id.is_none()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_event_store_try_get_queued_multiple_tasks(catalog: &Catalog) { + let event_store = catalog.get_one::().unwrap(); + + // Schedule a few tasks + let mut task_ids: Vec<_> = Vec::new(); + for _ in 0..3 { + let task_id = event_store.new_task_id().await.unwrap(); + event_store + .save_events( + &task_id, + vec![TaskEventCreated { + event_time: Utc::now(), + task_id, + logical_plan: Probe::default().into(), + metadata: None, + } + .into()], + ) + .await + .unwrap(); + + task_ids.push(task_id); + } + + // We should see the earliest registered task + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_ids[0])); + + // Mark task 0 as running + event_store + .save_events( + &task_ids[0], + vec![TaskEventRunning { + event_time: Utc::now(), + task_id: task_ids[0], + } + .into()], + ) + .await + .unwrap(); + + // Now we should see the next registered task + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_ids[1])); + + // Mark task 1 as running, then finished + event_store + .save_events( + &task_ids[1], + vec![ + TaskEventRunning { + event_time: Utc::now(), + task_id: task_ids[1], + } + .into(), + TaskEventFinished { + event_time: Utc::now(), + task_id: task_ids[1], + outcome: TaskOutcome::Success(TaskResult::Empty), + } + .into(), + ], + ) + .await + .unwrap(); + + // Now we should see the last registered task + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_ids[2])); + + // Task 0 got requeued + event_store + .save_events( + &task_ids[0], + vec![TaskEventRequeued { + event_time: Utc::now(), + task_id: task_ids[0], + } + .into()], + ) + .await + .unwrap(); + + // This should bring task 0 back to the top of the queue + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_ids[0])); + + // Mark task 0 as running, then finished + event_store + .save_events( + &task_ids[0], + vec![ + TaskEventRunning { + event_time: Utc::now(), + task_id: task_ids[0], + } + .into(), + TaskEventFinished { + event_time: Utc::now(), + task_id: task_ids[0], + outcome: TaskOutcome::Success(TaskResult::Empty), + } + .into(), + ], + ) + .await + .unwrap(); + + // Task 2 should be the top again + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert_eq!(maybe_task_id, Some(task_ids[2])); + + // Mark task 2 as running + event_store + .save_events( + &task_ids[2], + vec![TaskEventRunning { + event_time: Utc::now(), + task_id: task_ids[2], + } + .into()], + ) + .await + .unwrap(); + + // We should see empty queue + let maybe_task_id = event_store.try_get_queued_task().await.unwrap(); + assert!(maybe_task_id.is_none()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_event_store_get_running_tasks(catalog: &Catalog) { + let event_store = catalog.get_one::().unwrap(); + + // No running tasks initially + + let running_count = event_store.get_count_running_tasks().await.unwrap(); + assert_eq!(running_count, 0); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 100, + offset: 0, + }) + .try_collect() + .await + .unwrap(); + assert!(running_task_ids.is_empty()); + + // Schedule a few tasks + let mut task_ids: Vec<_> = Vec::new(); + for _ in 0..3 { + let task_id = event_store.new_task_id().await.unwrap(); + event_store + .save_events( + &task_id, + vec![TaskEventCreated { + event_time: Utc::now(), + task_id, + logical_plan: Probe::default().into(), + metadata: None, + } + .into()], + ) + .await + .unwrap(); + + task_ids.push(task_id); + } + + // Still no running tasks + + let running_count = event_store.get_count_running_tasks().await.unwrap(); + assert_eq!(running_count, 0); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 100, + offset: 0, + }) + .try_collect() + .await + .unwrap(); + assert!(running_task_ids.is_empty()); + + // Mark 2 of 3 tasks as running + event_store + .save_events( + &task_ids[0], + vec![TaskEventRunning { + event_time: Utc::now(), + task_id: task_ids[0], + } + .into()], + ) + .await + .unwrap(); + event_store + .save_events( + &task_ids[1], + vec![TaskEventRunning { + event_time: Utc::now(), + task_id: task_ids[1], + } + .into()], + ) + .await + .unwrap(); + + // Should see 2 running tasks + + let running_count = event_store.get_count_running_tasks().await.unwrap(); + assert_eq!(running_count, 2); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 100, + offset: 0, + }) + .try_collect() + .await + .unwrap(); + assert_eq!(running_task_ids, vec![task_ids[0], task_ids[1]]); + + // Query the same state with pagination args + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 1, + offset: 0, + }) + .try_collect() + .await + .unwrap(); + assert_eq!(running_task_ids, vec![task_ids[0]]); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 2, + offset: 1, + }) + .try_collect() + .await + .unwrap(); + assert_eq!(running_task_ids, vec![task_ids[1]]); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 100, + offset: 2, + }) + .try_collect() + .await + .unwrap(); + assert_eq!(running_task_ids, vec![]); + + // Finish 2nd task only + event_store + .save_events( + &task_ids[1], + vec![TaskEventFinished { + event_time: Utc::now(), + task_id: task_ids[1], + outcome: TaskOutcome::Success(TaskResult::Empty), + } + .into()], + ) + .await + .unwrap(); + + // Should see only the first running task + + let running_count = event_store.get_count_running_tasks().await.unwrap(); + assert_eq!(running_count, 1); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 100, + offset: 0, + }) + .try_collect() + .await + .unwrap(); + assert_eq!(running_task_ids, vec![task_ids[0]]); + + // Requeue 1st task + event_store + .save_events( + &task_ids[0], + vec![TaskEventRequeued { + event_time: Utc::now(), + task_id: task_ids[0], + } + .into()], + ) + .await + .unwrap(); + + // No running task after this, just 2 queued (#0, #2) and 1 finished (#1) + + let running_count = event_store.get_count_running_tasks().await.unwrap(); + assert_eq!(running_count, 0); + + let running_task_ids: Vec<_> = event_store + .get_running_tasks(PaginationOpts { + limit: 100, + offset: 0, + }) + .try_collect() + .await + .unwrap(); + assert!(running_task_ids.is_empty()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/task-system/sqlite/.sqlx/query-5aad9a609647d2df2ffc804adea19c35d595ace74c7cd5fb3f4f6b9ca8c61caa.json b/src/infra/task-system/sqlite/.sqlx/query-07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd.json similarity index 51% rename from src/infra/task-system/sqlite/.sqlx/query-5aad9a609647d2df2ffc804adea19c35d595ace74c7cd5fb3f4f6b9ca8c61caa.json rename to src/infra/task-system/sqlite/.sqlx/query-07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd.json index c0ae39215..2be1a9526 100644 --- a/src/infra/task-system/sqlite/.sqlx/query-5aad9a609647d2df2ffc804adea19c35d595ace74c7cd5fb3f4f6b9ca8c61caa.json +++ b/src/infra/task-system/sqlite/.sqlx/query-07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd.json @@ -1,10 +1,10 @@ { "db_name": "SQLite", - "query": "\n SELECT COUNT(event_id) as count from task_events\n ", + "query": "\n SELECT COUNT(event_id) AS events_count from task_events\n ", "describe": { "columns": [ { - "name": "count", + "name": "events_count", "ordinal": 0, "type_info": "Integer" } @@ -16,5 +16,5 @@ false ] }, - "hash": "5aad9a609647d2df2ffc804adea19c35d595ace74c7cd5fb3f4f6b9ca8c61caa" + "hash": "07c1f48769f4f93c55bb979ca5dc685b8bd7a95cf840f28ae8224bdefa08e1cd" } diff --git a/src/infra/task-system/sqlite/.sqlx/query-38a40ce900e6e3b5438013281855572b5806286e0d8dceeefce896e73cb2f5b2.json b/src/infra/task-system/sqlite/.sqlx/query-38a40ce900e6e3b5438013281855572b5806286e0d8dceeefce896e73cb2f5b2.json new file mode 100644 index 000000000..6c9bb479a --- /dev/null +++ b/src/infra/task-system/sqlite/.sqlx/query-38a40ce900e6e3b5438013281855572b5806286e0d8dceeefce896e73cb2f5b2.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT COUNT(task_id) AS tasks_count FROM tasks\n WHERE task_status == 'running'\n ", + "describe": { + "columns": [ + { + "name": "tasks_count", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "38a40ce900e6e3b5438013281855572b5806286e0d8dceeefce896e73cb2f5b2" +} diff --git a/src/infra/task-system/sqlite/.sqlx/query-febb2c5ca05ea2a4f9d20621a6d786269d9b83db21b2efcef8273c01f902859c.json b/src/infra/task-system/sqlite/.sqlx/query-40fa470dd5959726f1df8747e2da944fcd5644bfbc3fca9504655d54eb1fa5f4.json similarity index 55% rename from src/infra/task-system/sqlite/.sqlx/query-febb2c5ca05ea2a4f9d20621a6d786269d9b83db21b2efcef8273c01f902859c.json rename to src/infra/task-system/sqlite/.sqlx/query-40fa470dd5959726f1df8747e2da944fcd5644bfbc3fca9504655d54eb1fa5f4.json index 05e4eac67..38b365fd5 100644 --- a/src/infra/task-system/sqlite/.sqlx/query-febb2c5ca05ea2a4f9d20621a6d786269d9b83db21b2efcef8273c01f902859c.json +++ b/src/infra/task-system/sqlite/.sqlx/query-40fa470dd5959726f1df8747e2da944fcd5644bfbc3fca9504655d54eb1fa5f4.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\n INSERT INTO tasks(created_time) VALUES($1) RETURNING task_id as \"task_id: _\"\n ", + "query": "\n INSERT INTO task_ids(created_time) VALUES($1) RETURNING task_id as \"task_id: _\"\n ", "describe": { "columns": [ { @@ -16,5 +16,5 @@ false ] }, - "hash": "febb2c5ca05ea2a4f9d20621a6d786269d9b83db21b2efcef8273c01f902859c" + "hash": "40fa470dd5959726f1df8747e2da944fcd5644bfbc3fca9504655d54eb1fa5f4" } diff --git a/src/infra/task-system/sqlite/.sqlx/query-ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037.json b/src/infra/task-system/sqlite/.sqlx/query-48dbe4ede5f5e3c6c69745e57fb509fa652e66cc0e4a89dfa76a24563d74725c.json similarity index 54% rename from src/infra/task-system/sqlite/.sqlx/query-ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037.json rename to src/infra/task-system/sqlite/.sqlx/query-48dbe4ede5f5e3c6c69745e57fb509fa652e66cc0e4a89dfa76a24563d74725c.json index 0ec1c0651..f2c5ab43d 100644 --- a/src/infra/task-system/sqlite/.sqlx/query-ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037.json +++ b/src/infra/task-system/sqlite/.sqlx/query-48dbe4ede5f5e3c6c69745e57fb509fa652e66cc0e4a89dfa76a24563d74725c.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\n SELECT task_id\n FROM task_events\n WHERE dataset_id = $1 AND event_type = 'TaskEventCreated'\n ORDER BY task_id DESC LIMIT $2 OFFSET $3\n ", + "query": "\n SELECT task_id\n FROM tasks\n WHERE dataset_id = $1\n ORDER BY task_id DESC\n LIMIT $2 OFFSET $3\n ", "describe": { "columns": [ { @@ -16,5 +16,5 @@ false ] }, - "hash": "ca5768b1007209e1141cd43a4a6bc6cfd8f92620a747be2459aabf30dc9e3037" + "hash": "48dbe4ede5f5e3c6c69745e57fb509fa652e66cc0e4a89dfa76a24563d74725c" } diff --git a/src/infra/task-system/sqlite/.sqlx/query-515e14214498e1405deee168d1c297e4fe3946992049f4aa29fa513134c91de7.json b/src/infra/task-system/sqlite/.sqlx/query-515e14214498e1405deee168d1c297e4fe3946992049f4aa29fa513134c91de7.json deleted file mode 100644 index 393fdeaa4..000000000 --- a/src/infra/task-system/sqlite/.sqlx/query-515e14214498e1405deee168d1c297e4fe3946992049f4aa29fa513134c91de7.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n SELECT COUNT(event_id) as count FROM task_events\n WHERE dataset_id = $1\n ", - "describe": { - "columns": [ - { - "name": "count", - "ordinal": 0, - "type_info": "Integer" - } - ], - "parameters": { - "Right": 1 - }, - "nullable": [ - false - ] - }, - "hash": "515e14214498e1405deee168d1c297e4fe3946992049f4aa29fa513134c91de7" -} diff --git a/src/infra/task-system/sqlite/.sqlx/query-5b72d0c71b80bd8d776a5683203bb767d6b835270a18a778762f4378e0841993.json b/src/infra/task-system/sqlite/.sqlx/query-5b72d0c71b80bd8d776a5683203bb767d6b835270a18a778762f4378e0841993.json new file mode 100644 index 000000000..8d5ccbc81 --- /dev/null +++ b/src/infra/task-system/sqlite/.sqlx/query-5b72d0c71b80bd8d776a5683203bb767d6b835270a18a778762f4378e0841993.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT task_id FROM tasks\n WHERE task_status == 'running'\n ORDER BY task_id ASC\n LIMIT $1 OFFSET $2\n ", + "describe": { + "columns": [ + { + "name": "task_id", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false + ] + }, + "hash": "5b72d0c71b80bd8d776a5683203bb767d6b835270a18a778762f4378e0841993" +} diff --git a/src/infra/task-system/sqlite/.sqlx/query-865176ffdd9de03785cf71bb4f3cd52d869dbd84579828b63d1f7ef76ae04a2a.json b/src/infra/task-system/sqlite/.sqlx/query-865176ffdd9de03785cf71bb4f3cd52d869dbd84579828b63d1f7ef76ae04a2a.json new file mode 100644 index 000000000..c8c1bd0d8 --- /dev/null +++ b/src/infra/task-system/sqlite/.sqlx/query-865176ffdd9de03785cf71bb4f3cd52d869dbd84579828b63d1f7ef76ae04a2a.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT COUNT(task_id) AS tasks_count FROM tasks\n WHERE dataset_id = $1\n ", + "describe": { + "columns": [ + { + "name": "tasks_count", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 1 + }, + "nullable": [ + false + ] + }, + "hash": "865176ffdd9de03785cf71bb4f3cd52d869dbd84579828b63d1f7ef76ae04a2a" +} diff --git a/src/infra/task-system/sqlite/.sqlx/query-866f5c19c29dfb5e67c8909cd6e0b50780367c74e1b7a9e5c9cc1de6f7ece345.json b/src/infra/task-system/sqlite/.sqlx/query-866f5c19c29dfb5e67c8909cd6e0b50780367c74e1b7a9e5c9cc1de6f7ece345.json new file mode 100644 index 000000000..3af2899be --- /dev/null +++ b/src/infra/task-system/sqlite/.sqlx/query-866f5c19c29dfb5e67c8909cd6e0b50780367c74e1b7a9e5c9cc1de6f7ece345.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT task_id FROM tasks\n WHERE task_status = 'queued'\n ORDER BY task_id ASC\n LIMIT 1\n ", + "describe": { + "columns": [ + { + "name": "task_id", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "866f5c19c29dfb5e67c8909cd6e0b50780367c74e1b7a9e5c9cc1de6f7ece345" +} diff --git a/src/infra/task-system/sqlite/.sqlx/query-9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2.json b/src/infra/task-system/sqlite/.sqlx/query-9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2.json new file mode 100644 index 000000000..5b83d5a9f --- /dev/null +++ b/src/infra/task-system/sqlite/.sqlx/query-9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n UPDATE tasks\n SET task_status = $2\n WHERE task_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 2 + }, + "nullable": [] + }, + "hash": "9b399a869b6bfa061f653dca50c72c6a96d45bbb7f771f4f887886f817eceac2" +} diff --git a/src/infra/task-system/sqlite/.sqlx/query-a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee.json b/src/infra/task-system/sqlite/.sqlx/query-a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee.json new file mode 100644 index 000000000..48509fd47 --- /dev/null +++ b/src/infra/task-system/sqlite/.sqlx/query-a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n INSERT INTO tasks (task_id, dataset_id, task_status)\n VALUES ($1, $2, $3)\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 3 + }, + "nullable": [] + }, + "hash": "a3fe8db1c38d027b8e26c19f1c022d98c987ebf173bb64e3bde6b5d32928beee" +} diff --git a/src/infra/task-system/sqlite/src/lib.rs b/src/infra/task-system/sqlite/src/lib.rs index 521faa011..20b6456f6 100644 --- a/src/infra/task-system/sqlite/src/lib.rs +++ b/src/infra/task-system/sqlite/src/lib.rs @@ -14,6 +14,6 @@ // Re-exports pub use kamu_task_system as domain; -mod sqlite_task_system_event_store; +mod sqlite_task_event_store; -pub use sqlite_task_system_event_store::*; +pub use sqlite_task_event_store::*; diff --git a/src/infra/task-system/sqlite/src/sqlite_task_system_event_store.rs b/src/infra/task-system/sqlite/src/sqlite_task_event_store.rs similarity index 53% rename from src/infra/task-system/sqlite/src/sqlite_task_system_event_store.rs rename to src/infra/task-system/sqlite/src/sqlite_task_event_store.rs index a8a04381c..d2b0be4de 100644 --- a/src/infra/task-system/sqlite/src/sqlite_task_system_event_store.rs +++ b/src/infra/task-system/sqlite/src/sqlite_task_event_store.rs @@ -8,12 +8,12 @@ // by the Apache License, Version 2.0. use chrono::Utc; -use database_common::{TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::*; use futures::TryStreamExt; use kamu_task_system::*; use opendatafabric::DatasetID; -use sqlx::{FromRow, QueryBuilder}; +use sqlx::{FromRow, QueryBuilder, Sqlite}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -22,27 +22,110 @@ pub struct SqliteTaskSystemEventStore { } #[component(pub)] -#[interface(dyn TaskSystemEventStore)] +#[interface(dyn TaskEventStore)] impl SqliteTaskSystemEventStore { pub fn new(transaction: TransactionRef) -> Self { Self { transaction: transaction.into(), } } + async fn save_task_updates_from_events( + &self, + tr: &mut database_common::TransactionGuard<'_, Sqlite>, + events: &[TaskEvent], + ) -> Result<(), SaveEventsError> { + for event in events { + let connection_mut = tr.connection_mut().await?; + + let event_task_id: i64 = (event.task_id()).try_into().unwrap(); + + if let TaskEvent::TaskCreated(e) = &event { + let maybe_dataset_id = e.logical_plan.dataset_id().map(ToString::to_string); + sqlx::query!( + r#" + INSERT INTO tasks (task_id, dataset_id, task_status) + VALUES ($1, $2, $3) + "#, + event_task_id, + maybe_dataset_id, + TaskStatus::Queued, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + /* Existing task, update status */ + else { + let new_status = event.new_status(); + + sqlx::query!( + r#" + UPDATE tasks + SET task_status = $2 + WHERE task_id = $1 + "#, + event_task_id, + new_status, + ) + .execute(connection_mut) + .await + .map_err(|e| SaveEventsError::Internal(e.int_err()))?; + } + } + + Ok(()) + } + + async fn save_events_impl( + &self, + tr: &mut database_common::TransactionGuard<'_, Sqlite>, + events: &[TaskEvent], + ) -> Result { + let connection_mut = tr.connection_mut().await?; + + #[derive(FromRow)] + struct ResultRow { + event_id: i64, + } + + let mut query_builder = QueryBuilder::::new( + r#" + INSERT INTO task_events (task_id, event_time, event_type, event_payload) + "#, + ); + + query_builder.push_values(events, |mut b, event| { + let event_task_id: i64 = event.task_id().try_into().unwrap(); + b.push_bind(event_task_id); + b.push_bind(event.event_time()); + b.push_bind(event.typename()); + b.push_bind(serde_json::to_value(event).unwrap()); + }); + + query_builder.push("RETURNING event_id"); + + let rows = query_builder + .build_query_as::() + .fetch_all(connection_mut) + .await + .int_err()?; + + let last_event_id = rows.last().unwrap().event_id; + Ok(EventID::new(last_event_id)) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] impl EventStore for SqliteTaskSystemEventStore { - async fn get_events(&self, task_id: &TaskID, opts: GetEventsOpts) -> EventStream { - let mut tr = self.transaction.lock().await; - - let task_id: i64 = (*task_id).into(); + fn get_events(&self, task_id: &TaskID, opts: GetEventsOpts) -> EventStream { + let task_id: i64 = (*task_id).try_into().unwrap(); let maybe_from_id = opts.from.map(EventID::into_inner); let maybe_to_id = opts.to.map(EventID::into_inner); Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr .connection_mut() .await?; @@ -91,38 +174,11 @@ impl EventStore for SqliteTaskSystemEventStore { } let mut tr = self.transaction.lock().await; - let connection_mut = tr.connection_mut().await?; - - #[derive(FromRow)] - struct ResultRow { - event_id: i64, - } - let mut query_builder = QueryBuilder::::new( - r#" - INSERT INTO task_events (task_id, dataset_id, event_time, event_type, event_payload) - "#, - ); - - query_builder.push_values(events, |mut b, event| { - let event_task_id: i64 = event.task_id().into(); - b.push_bind(event_task_id); - b.push_bind(event.dataset_id().map(ToString::to_string)); - b.push_bind(event.event_time()); - b.push_bind(event.typename()); - b.push_bind(serde_json::to_value(event).unwrap()); - }); + self.save_task_updates_from_events(&mut tr, &events).await?; + let last_event_id = self.save_events_impl(&mut tr, &events).await?; - query_builder.push("RETURNING event_id"); - - let rows = query_builder - .build_query_as::() - .fetch_all(connection_mut) - .await - .int_err()?; - let last_event_id = rows.last().unwrap().event_id; - - Ok(EventID::new(last_event_id)) + Ok(last_event_id) } async fn len(&self) -> Result { @@ -131,14 +187,14 @@ impl EventStore for SqliteTaskSystemEventStore { let result = sqlx::query!( r#" - SELECT COUNT(event_id) as count from task_events + SELECT COUNT(event_id) AS events_count from task_events "#, ) .fetch_one(connection_mut) .await .int_err()?; - let count = usize::try_from(result.count).int_err()?; + let count = usize::try_from(result.events_count).int_err()?; Ok(count) } } @@ -146,7 +202,7 @@ impl EventStore for SqliteTaskSystemEventStore { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl TaskSystemEventStore for SqliteTaskSystemEventStore { +impl TaskEventStore for SqliteTaskSystemEventStore { /// Generates new unique task identifier async fn new_task_id(&self) -> Result { let mut tr = self.transaction.lock().await; @@ -163,7 +219,7 @@ impl TaskSystemEventStore for SqliteTaskSystemEventStore { let result = sqlx::query_as!( NewTask, r#" - INSERT INTO tasks(created_time) VALUES($1) RETURNING task_id as "task_id: _" + INSERT INTO task_ids(created_time) VALUES($1) RETURNING task_id as "task_id: _" "#, created_time ) @@ -171,20 +227,96 @@ impl TaskSystemEventStore for SqliteTaskSystemEventStore { .await .int_err()?; - Ok(TaskID::new(result.task_id)) + Ok(TaskID::try_from(result.task_id).unwrap()) + } + + /// Attempts to get the earliest queued task, if any + async fn try_get_queued_task(&self) -> Result, InternalError> { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let maybe_task_id = sqlx::query!( + r#" + SELECT task_id FROM tasks + WHERE task_status = 'queued' + ORDER BY task_id ASC + LIMIT 1 + "#, + ) + .try_map(|event_row| { + let task_id = event_row.task_id; + Ok(TaskID::try_from(task_id).unwrap()) + }) + .fetch_optional(connection_mut) + .await + .map_err(ErrorIntoInternal::int_err)?; + + Ok(maybe_task_id) + } + + /// Returns list of tasks, which are in Running state, from earliest to + /// latest + fn get_running_tasks(&self, pagination: PaginationOpts) -> TaskIDStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let limit = i64::try_from(pagination.limit).int_err()?; + let offset = i64::try_from(pagination.offset).int_err()?; + + let mut query_stream = sqlx::query!( + r#" + SELECT task_id FROM tasks + WHERE task_status == 'running' + ORDER BY task_id ASC + LIMIT $1 OFFSET $2 + "#, + limit, + offset, + ) + .try_map(|event_row| { + let task_id = event_row.task_id; + Ok(TaskID::try_from(task_id).unwrap()) + }) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + while let Some(task_id) = query_stream.try_next().await? { + yield Ok(task_id); + } + }) + } + + /// Returns total number of tasks, which are in Running state + async fn get_count_running_tasks(&self) -> Result { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let result = sqlx::query!( + r#" + SELECT COUNT(task_id) AS tasks_count FROM tasks + WHERE task_status == 'running' + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + let count = usize::try_from(result.tasks_count).int_err()?; + Ok(count) } /// Returns page of the tasks associated with the specified dataset in /// reverse chronological order based on creation time - async fn get_tasks_by_dataset( + fn get_tasks_by_dataset( &self, dataset_id: &DatasetID, - pagination: TaskPaginationOpts, + pagination: PaginationOpts, ) -> TaskIDStream { - let mut tr = self.transaction.lock().await; let dataset_id = dataset_id.to_string(); Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; let limit = i64::try_from(pagination.limit).int_err()?; @@ -193,15 +325,19 @@ impl TaskSystemEventStore for SqliteTaskSystemEventStore { let mut query_stream = sqlx::query!( r#" SELECT task_id - FROM task_events - WHERE dataset_id = $1 AND event_type = 'TaskEventCreated' - ORDER BY task_id DESC LIMIT $2 OFFSET $3 + FROM tasks + WHERE dataset_id = $1 + ORDER BY task_id DESC + LIMIT $2 OFFSET $3 "#, dataset_id, limit, offset, ) - .try_map(|event_row| Ok(TaskID::new(event_row.task_id))) + .try_map(|event_row| { + let task_id = event_row.task_id; + Ok(TaskID::try_from(task_id).unwrap()) + }) .fetch(connection_mut) .map_err(ErrorIntoInternal::int_err); @@ -223,7 +359,7 @@ impl TaskSystemEventStore for SqliteTaskSystemEventStore { let result = sqlx::query!( r#" - SELECT COUNT(event_id) as count FROM task_events + SELECT COUNT(task_id) AS tasks_count FROM tasks WHERE dataset_id = $1 "#, dataset_id_str @@ -232,7 +368,7 @@ impl TaskSystemEventStore for SqliteTaskSystemEventStore { .await .int_err()?; - let count = usize::try_from(result.count).int_err()?; + let count = usize::try_from(result.tasks_count).int_err()?; Ok(count) } } diff --git a/src/infra/task-system/sqlite/tests/tests/test_sqlite_task_system_event_store.rs b/src/infra/task-system/sqlite/tests/tests/test_sqlite_task_system_event_store.rs index c1efc5913..514509542 100644 --- a/src/infra/task-system/sqlite/tests/tests/test_sqlite_task_system_event_store.rs +++ b/src/infra/task-system/sqlite/tests/tests/test_sqlite_task_system_event_store.rs @@ -55,6 +55,30 @@ database_transactional_test!( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +database_transactional_test!( + storage = sqlite, + fixture = kamu_task_system_repo_tests::test_event_store_try_get_queued_single_task, + harness = SqliteTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_task_system_repo_tests::test_event_store_try_get_queued_multiple_tasks, + harness = SqliteTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = kamu_task_system_repo_tests::test_event_store_get_running_tasks, + harness = SqliteTaskSystemEventStoreHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct SqliteTaskSystemEventStoreHarness { catalog: Catalog, } diff --git a/src/utils/database-common/src/entities.rs b/src/utils/database-common/src/entities.rs index 19745380a..07d9a0ff4 100644 --- a/src/utils/database-common/src/entities.rs +++ b/src/utils/database-common/src/entities.rs @@ -7,7 +7,12 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -pub struct DatabasePaginationOpts { - pub limit: i64, - pub offset: i64, +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct PaginationOpts { + pub limit: usize, + pub offset: usize, } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/event-sourcing/src/aggregate.rs b/src/utils/event-sourcing/src/aggregate.rs index 017852d30..b203142ab 100644 --- a/src/utils/event-sourcing/src/aggregate.rs +++ b/src/utils/event-sourcing/src/aggregate.rs @@ -133,15 +133,13 @@ where ) -> Result> { use tokio_stream::StreamExt; - let mut event_stream = event_store - .get_events( - &query, - GetEventsOpts { - from: None, - to: opts.as_of_event, - }, - ) - .await; + let mut event_stream = event_store.get_events( + &query, + GetEventsOpts { + from: None, + to: opts.as_of_event, + }, + ); let (event_id, event) = match event_stream.next().await { Some(Ok(v)) => v, @@ -197,15 +195,13 @@ where let prev_stored_event = self.last_stored_event; - let mut event_stream = event_store - .get_events( - &self.query, - GetEventsOpts { - from: prev_stored_event, - to: opts.as_of_event, - }, - ) - .await; + let mut event_stream = event_store.get_events( + &self.query, + GetEventsOpts { + from: prev_stored_event, + to: opts.as_of_event, + }, + ); let mut num_events = 1; diff --git a/src/utils/event-sourcing/src/event_store.rs b/src/utils/event-sourcing/src/event_store.rs index 849a6a727..f4967928d 100644 --- a/src/utils/event-sourcing/src/event_store.rs +++ b/src/utils/event-sourcing/src/event_store.rs @@ -18,11 +18,7 @@ use crate::{EventID, Projection}; #[async_trait::async_trait] pub trait EventStore: Send + Sync { /// Returns the event history of an aggregate in chronological order - async fn get_events( - &self, - query: &Proj::Query, - opts: GetEventsOpts, - ) -> EventStream; + fn get_events(&self, query: &Proj::Query, opts: GetEventsOpts) -> EventStream; /// Persists a series of events /// diff --git a/src/utils/event-sourcing/src/inmem_event_store.rs b/src/utils/event-sourcing/src/inmem_event_store.rs index af7dc2f0c..5516c31e5 100644 --- a/src/utils/event-sourcing/src/inmem_event_store.rs +++ b/src/utils/event-sourcing/src/inmem_event_store.rs @@ -43,11 +43,7 @@ impl> EventStore Ok(self.state.lock().unwrap().events_count()) } - async fn get_events( - &self, - query: &Proj::Query, - opts: GetEventsOpts, - ) -> EventStream { + fn get_events(&self, query: &Proj::Query, opts: GetEventsOpts) -> EventStream { let query = query.clone(); // TODO: This should be a buffered stream so we don't lock per event diff --git a/src/utils/event-sourcing/tests/test_aggregate.rs b/src/utils/event-sourcing/tests/test_aggregate.rs index 66d835720..a2c2438cb 100644 --- a/src/utils/event-sourcing/tests/test_aggregate.rs +++ b/src/utils/event-sourcing/tests/test_aggregate.rs @@ -51,7 +51,7 @@ impl CalcEventStore { #[async_trait::async_trait] impl EventStore for CalcEventStore { - async fn get_events(&self, _query: &(), _opts: GetEventsOpts) -> EventStream { + fn get_events(&self, _query: &(), _opts: GetEventsOpts) -> EventStream { use futures::StreamExt; Box::pin( tokio_stream::iter(self.0.lock().unwrap().clone()) diff --git a/src/utils/messaging-outbox/src/repos/outbox_message_consumption_repository.rs b/src/utils/messaging-outbox/src/repos/outbox_message_consumption_repository.rs index 603e88ea3..dabea3e39 100644 --- a/src/utils/messaging-outbox/src/repos/outbox_message_consumption_repository.rs +++ b/src/utils/messaging-outbox/src/repos/outbox_message_consumption_repository.rs @@ -16,9 +16,7 @@ use crate::OutboxMessageID; #[async_trait::async_trait] pub trait OutboxMessageConsumptionRepository: Send + Sync { - async fn list_consumption_boundaries( - &self, - ) -> Result; + fn list_consumption_boundaries(&self) -> OutboxMessageConsumptionBoundariesStream; async fn find_consumption_boundary( &self, diff --git a/src/utils/messaging-outbox/src/repos/outbox_message_repository.rs b/src/utils/messaging-outbox/src/repos/outbox_message_repository.rs index 77442b7c1..666af79dd 100644 --- a/src/utils/messaging-outbox/src/repos/outbox_message_repository.rs +++ b/src/utils/messaging-outbox/src/repos/outbox_message_repository.rs @@ -17,12 +17,12 @@ use crate::{NewOutboxMessage, OutboxMessage, OutboxMessageID}; pub trait OutboxMessageRepository: Send + Sync { async fn push_message(&self, message: NewOutboxMessage) -> Result<(), InternalError>; - async fn get_producer_messages( + fn get_producer_messages( &self, producer_name: &str, above_id: OutboxMessageID, batch_size: usize, - ) -> Result; + ) -> OutboxMessageStream; async fn get_latest_message_ids_by_producer( &self, diff --git a/src/utils/messaging-outbox/src/services/outbox_transactional_processor.rs b/src/utils/messaging-outbox/src/services/outbox_transactional_processor.rs index 9c75d18ea..5ce7143b7 100644 --- a/src/utils/messaging-outbox/src/services/outbox_transactional_processor.rs +++ b/src/utils/messaging-outbox/src/services/outbox_transactional_processor.rs @@ -75,14 +75,17 @@ impl OutboxTransactionalProcessor { ) } - #[tracing::instrument(level = "debug", skip_all)] - pub async fn run(&self) -> Result<(), InternalError> { + #[tracing::instrument(level = "info", skip_all)] + pub async fn pre_run(&self) -> Result<(), InternalError> { // Trace current routes self.debug_message_routes(); // Make sure consumption records represent the routes - self.init_consumption_records().await?; + self.init_consumption_records().await + } + #[tracing::instrument(level = "info", skip_all)] + pub async fn run(&self) -> Result<(), InternalError> { // Main relay loop loop { self.run_relay_iteration().await?; @@ -95,12 +98,6 @@ impl OutboxTransactionalProcessor { // To be used by tests only! #[tracing::instrument(level = "debug", skip_all)] pub async fn run_single_iteration_only(&self) -> Result<(), InternalError> { - // Trace current routes - self.debug_message_routes(); - - // Make sure consumption records represent the routes - self.init_consumption_records().await?; - // Run single iteration instead of a loop self.run_relay_iteration().await?; Ok(()) @@ -121,7 +118,6 @@ impl OutboxTransactionalProcessor { use futures::TryStreamExt; let consumptions = outbox_consumption_repository .list_consumption_boundaries() - .await? .try_collect::>().await?; // Build a set of producer-consumer pairs that already exist in the database @@ -233,11 +229,10 @@ impl OutboxTransactionalProcessor { .transactional_with( |outbox_consumption_repository: Arc| async move { let consumptions_stream = outbox_consumption_repository - .list_consumption_boundaries() - .await?; + .list_consumption_boundaries(); - use futures::TryStreamExt; - consumptions_stream.try_collect::>().await + use futures::TryStreamExt; + consumptions_stream.try_collect::>().await }, ) .await @@ -408,9 +403,11 @@ impl ProducerRelayJob { DatabaseTransactionRunner::new(self.catalog.clone()) .transactional_with( |outbox_message_repository: Arc| async move { - let messages_stream = outbox_message_repository - .get_producer_messages(&self.producer_name, above_id, batch_size) - .await?; + let messages_stream = outbox_message_repository.get_producer_messages( + &self.producer_name, + above_id, + batch_size, + ); use futures::TryStreamExt; messages_stream.try_collect::>().await diff --git a/src/utils/messaging-outbox/tests/tests/test_dispatching_outbox_impl.rs b/src/utils/messaging-outbox/tests/tests/test_dispatching_outbox_impl.rs index a3892e984..25ff6bfd3 100644 --- a/src/utils/messaging-outbox/tests/tests/test_dispatching_outbox_impl.rs +++ b/src/utils/messaging-outbox/tests/tests/test_dispatching_outbox_impl.rs @@ -255,8 +255,6 @@ impl DispatchingOutboxHarness { let outbox_messages: Vec<_> = self .outbox_message_repository .get_producer_messages(producer_name, OutboxMessageID::new(0), 10) - .await - .unwrap() .try_collect() .await .unwrap(); diff --git a/src/utils/messaging-outbox/tests/tests/test_outbox_transactional_processor.rs b/src/utils/messaging-outbox/tests/tests/test_outbox_transactional_processor.rs index f7f8af8fb..a016c50c9 100644 --- a/src/utils/messaging-outbox/tests/tests/test_outbox_transactional_processor.rs +++ b/src/utils/messaging-outbox/tests/tests/test_outbox_transactional_processor.rs @@ -49,6 +49,7 @@ async fn test_deliver_messages_of_one_type() { }; let harness = TransactionalOutboxProcessorHarness::new(); + harness.outbox_processor.pre_run().await.unwrap(); harness .outbox @@ -63,7 +64,14 @@ async fn test_deliver_messages_of_one_type() { // Posted, but not delivered yet! harness.check_delivered_messages(&[], &[], &[], &[]); - harness.check_consumption_boundaries(&[]).await; + harness + .check_consumption_boundaries(&[ + (TEST_PRODUCER_A, "TestMessageConsumerA", 0), + (TEST_PRODUCER_B, "TestMessageConsumerB", 0), + (TEST_PRODUCER_C, "TestMessageConsumerC1", 0), + (TEST_PRODUCER_C, "TestMessageConsumerC2", 0), + ]) + .await; // Run relay iteration harness @@ -96,6 +104,7 @@ async fn test_deliver_messages_of_two_types() { }; let harness = TransactionalOutboxProcessorHarness::new(); + harness.outbox_processor.pre_run().await.unwrap(); harness .outbox @@ -110,7 +119,14 @@ async fn test_deliver_messages_of_two_types() { // Posted, but not delivered yet! harness.check_delivered_messages(&[], &[], &[], &[]); - harness.check_consumption_boundaries(&[]).await; + harness + .check_consumption_boundaries(&[ + (TEST_PRODUCER_A, "TestMessageConsumerA", 0), + (TEST_PRODUCER_B, "TestMessageConsumerB", 0), + (TEST_PRODUCER_C, "TestMessageConsumerC1", 0), + (TEST_PRODUCER_C, "TestMessageConsumerC2", 0), + ]) + .await; // Run relay iteration harness @@ -144,6 +160,7 @@ async fn test_deliver_messages_multiple_consumers() { }; let harness = TransactionalOutboxProcessorHarness::new(); + harness.outbox_processor.pre_run().await.unwrap(); harness .outbox @@ -158,7 +175,14 @@ async fn test_deliver_messages_multiple_consumers() { // Posted, but not delivered yet! harness.check_delivered_messages(&[], &[], &[], &[]); - harness.check_consumption_boundaries(&[]).await; + harness + .check_consumption_boundaries(&[ + (TEST_PRODUCER_A, "TestMessageConsumerA", 0), + (TEST_PRODUCER_B, "TestMessageConsumerB", 0), + (TEST_PRODUCER_C, "TestMessageConsumerC1", 0), + (TEST_PRODUCER_C, "TestMessageConsumerC2", 0), + ]) + .await; // Run relay iteration harness @@ -190,6 +214,7 @@ async fn test_deliver_messages_multiple_consumers() { #[test_log::test(tokio::test)] async fn test_deliver_messages_with_partial_consumption() { let harness = TransactionalOutboxProcessorHarness::new(); + harness.outbox_processor.pre_run().await.unwrap(); let message_texts = ["foo", "bar", "baz", "super", "duper"]; for message_text in message_texts { @@ -208,7 +233,7 @@ async fn test_deliver_messages_with_partial_consumption() { // Let's assume some initial partial boundaries harness .outbox_consumption_repository - .create_consumption_boundary(OutboxMessageConsumptionBoundary { + .update_consumption_boundary(OutboxMessageConsumptionBoundary { producer_name: TEST_PRODUCER_C.to_string(), consumer_name: "TestMessageConsumerC1".to_string(), last_consumed_message_id: OutboxMessageID::new(2), @@ -217,7 +242,7 @@ async fn test_deliver_messages_with_partial_consumption() { .unwrap(); harness .outbox_consumption_repository - .create_consumption_boundary(OutboxMessageConsumptionBoundary { + .update_consumption_boundary(OutboxMessageConsumptionBoundary { producer_name: TEST_PRODUCER_C.to_string(), consumer_name: "TestMessageConsumerC2".to_string(), last_consumed_message_id: OutboxMessageID::new(4), @@ -229,6 +254,8 @@ async fn test_deliver_messages_with_partial_consumption() { harness.check_delivered_messages(&[], &[], &[], &[]); harness .check_consumption_boundaries(&[ + (TEST_PRODUCER_A, "TestMessageConsumerA", 0), + (TEST_PRODUCER_B, "TestMessageConsumerB", 0), (TEST_PRODUCER_C, "TestMessageConsumerC1", 2), (TEST_PRODUCER_C, "TestMessageConsumerC2", 4), ]) @@ -352,8 +379,6 @@ impl TransactionalOutboxProcessorHarness { let mut boundaries: Vec<_> = self .outbox_consumption_repository .list_consumption_boundaries() - .await - .unwrap() .try_collect() .await .unwrap(); diff --git a/src/utils/messaging-outbox/tests/tests/test_transactional_outbox_impl.rs b/src/utils/messaging-outbox/tests/tests/test_transactional_outbox_impl.rs index 28f07fb2d..b9affddb9 100644 --- a/src/utils/messaging-outbox/tests/tests/test_transactional_outbox_impl.rs +++ b/src/utils/messaging-outbox/tests/tests/test_transactional_outbox_impl.rs @@ -133,8 +133,6 @@ impl TransactionalOutboxHarness { let outbox_messages: Vec<_> = self .outbox_message_repository .get_producer_messages(producer_name, OutboxMessageID::new(0), 10) - .await - .unwrap() .try_collect() .await .unwrap();