From 8d6e7806fdb5acc106d32ea94a6ec74e5e456179 Mon Sep 17 00:00:00 2001 From: Simon Popugaev Date: Mon, 18 Mar 2024 14:00:28 +0300 Subject: [PATCH] sharding test scenarios --- scenario.json | 48 ++++++++ scenario1.json | 20 ++++ scenario2.json | 15 +++ scenario3.json | 23 ++++ tests/sharding.rs | 275 +++++++++++++++++++++++++++++++++------------- 5 files changed, 307 insertions(+), 74 deletions(-) create mode 100644 scenario.json create mode 100644 scenario1.json create mode 100644 scenario2.json create mode 100644 scenario3.json diff --git a/scenario.json b/scenario.json new file mode 100644 index 0000000..06e7d9e --- /dev/null +++ b/scenario.json @@ -0,0 +1,48 @@ +{ + "workers_count": 4, + "steps": [ + "StopAllShards", + { + "InvokeAndAwaitWorkersAsync": "Invoke, RestartShardManager, StartShards" + }, + "RestartShardManager", + { + "Sleep": { + "secs": 3, + "nanos": 0 + } + }, + { + "StartShards": 4 + }, + "WaitForInvokeAndAwaitResult", + "StopAllShards", + "RestartShardManager", + { + "StartShards": 4 + }, + "RestartShardManager", + { + "InvokeAndAwaitWorkersAsync": "StartShards, RestartShardManager, Invoke" + }, + "WaitForInvokeAndAwaitResult", + "StopAllShards", + "RestartShardManager", + { + "StartShards": 4 + }, + { + "StopShards": 3 + }, + { + "Sleep": { + "secs": 3, + "nanos": 0 + } + }, + { + "InvokeAndAwaitWorkersAsync": "StartShards(4), StopShards(3), Invoke" + }, + "WaitForInvokeAndAwaitResult" + ] +} diff --git a/scenario1.json b/scenario1.json new file mode 100644 index 0000000..75187da --- /dev/null +++ b/scenario1.json @@ -0,0 +1,20 @@ +{ + "workers_count": 4, + "steps": [ + "StopAllShards", + { + "InvokeAndAwaitWorkersAsync": "Invoke, RestartShardManager, StartShards" + }, + "RestartShardManager", + { + "Sleep": { + "secs": 3, + "nanos": 0 + } + }, + { + "StartShards": 4 + }, + "WaitForInvokeAndAwaitResult" + ] +} diff --git a/scenario2.json b/scenario2.json new file mode 100644 index 0000000..44d0e38 --- /dev/null +++ b/scenario2.json @@ -0,0 +1,15 @@ +{ + "workers_count": 4, + "steps": [ + "StopAllShards", + "RestartShardManager", + { + "StartShards": 4 + }, + "RestartShardManager", + { + "InvokeAndAwaitWorkersAsync": "StartShards, RestartShardManager, Invoke" + }, + "WaitForInvokeAndAwaitResult" + ] +} diff --git a/scenario3.json b/scenario3.json new file mode 100644 index 0000000..0942006 --- /dev/null +++ b/scenario3.json @@ -0,0 +1,23 @@ +{ + "workers_count": 4, + "steps": [ + "StopAllShards", + "RestartShardManager", + { + "StartShards": 4 + }, + { + "StopShards": 3 + }, + { + "Sleep": { + "secs": 3, + "nanos": 0 + } + }, + { + "InvokeAndAwaitWorkersAsync": "StartShards(4), StopShards(3), Invoke" + }, + "WaitForInvokeAndAwaitResult" + ] +} diff --git a/tests/sharding.rs b/tests/sharding.rs index 7ed7bcd..7301718 100644 --- a/tests/sharding.rs +++ b/tests/sharding.rs @@ -7,9 +7,11 @@ use golem_cli::model::InvocationKey; use golem_client::model::VersionedWorkerId; use libtest_mimic::{Arguments, Conclusion, Failed, Trial}; use rand::prelude::*; +use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashSet; -use std::sync::mpsc::Receiver; +use std::path::PathBuf; +use std::sync::mpsc::{Receiver, Sender}; use std::sync::Arc; use std::time::Duration; use testcontainers::clients; @@ -17,14 +19,70 @@ use testcontainers::clients; pub mod cli; pub mod context; -fn run(context: ContextInfo) -> Conclusion { +#[derive(Debug, Clone, Serialize, Deserialize)] +struct Scenario { + pub workers_count: usize, + pub steps: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +enum Step { + StartShards(usize), + StopShards(usize), + StopAllShards, + RestartShardManager, + Sleep(Duration), + InvokeAndAwaitWorkersAsync(String), + WaitForInvokeAndAwaitResult, +} + +// fn default_scenario() -> Scenario { +// Scenario { +// workers_count: 4, +// steps: vec![ +// Step::StopAllShards, +// Step::InvokeAndAwaitWorkersAsync("Invoke, RestartShardManager, StartShards".to_string()), +// Step::RestartShardManager, +// Step::Sleep(Duration::from_secs(3)), +// Step::StartShards(4), +// Step::WaitForInvokeAndAwaitResult, +// Step::StopAllShards, +// Step::RestartShardManager, +// Step::StartShards(4), +// Step::RestartShardManager, +// Step::InvokeAndAwaitWorkersAsync("StartShards, RestartShardManager, Invoke".to_string()), +// Step::WaitForInvokeAndAwaitResult, +// Step::StopAllShards, +// Step::RestartShardManager, +// Step::StartShards(4), +// Step::StopShards(3), +// Step::Sleep(Duration::from_secs(3)), +// Step::InvokeAndAwaitWorkersAsync("StartShards(4), StopShards(3), Invoke".to_string()), +// Step::WaitForInvokeAndAwaitResult, +// ], +// } +// } + +fn read_scenario() -> Scenario { + let file_path_str = + std::env::var("GOLEM_TEST_SCENARIO").unwrap_or("./scenario.json".to_string()); + println!("Reading scenario from {file_path_str}"); + + let path = PathBuf::from(file_path_str); + + let file = std::fs::File::open(&path).unwrap(); + + serde_json::from_reader(file).unwrap() +} + +fn run(context: ContextInfo, env_command_tx: Sender) -> Conclusion { let args = Arguments::from_args(); let context = Arc::new(context); let mut tests = Vec::new(); - tests.append(&mut all(context.clone())); + tests.append(&mut all(context.clone(), env_command_tx)); libtest_mimic::run(&args, tests) } @@ -33,45 +91,46 @@ fn main() -> Result<(), Failed> { env_logger::init(); let (tx, rx) = std::sync::mpsc::channel(); - let (stop_tx, stop_rx) = std::sync::mpsc::channel(); + let (env_command_tx, env_command_rx) = std::sync::mpsc::channel(); let context_handler = std::thread::spawn(move || { let docker = clients::Cli::default(); - let context = Context::start(&docker, EnvConfig::from_env_with_shards(0)).unwrap(); + let context = Context::start(&docker, EnvConfig::from_env_with_shards(3)).unwrap(); let context_info = context.info(); tx.send(context_info).unwrap(); - make_env_unstable(context, stop_rx); + env_handler(context, env_command_rx); drop(docker); }); let context_info = rx.recv().unwrap(); - let res = run(context_info); + let res = run(context_info, env_command_tx); - stop_tx.send(()).unwrap(); context_handler.join().unwrap(); res.exit() } -pub fn all(context: Arc) -> Vec { +fn all(context: Arc, env_command_tx: Sender) -> Vec { let cli = CliLive::make(&context).unwrap().with_long_args(); - let ctx = (context, cli); + let ctx = (context, read_scenario(), env_command_tx, cli); vec![Trial::test_in_context( format!("service_is_responsive_to_shard_changes"), - ctx.clone(), + ctx, service_is_responsive_to_shard_changes, )] } -enum Command { - StartShard, - StopShard, +enum EnvCommand { + StartShards(usize), + StopShards(usize), + StopAllShards, RestartShardManager, + Stop, } fn start_shard(context: &mut Context) { @@ -109,6 +168,12 @@ fn start_shard(context: &mut Context) { } } +fn start_shards(context: &mut Context, n: usize) { + for _ in 1..=n { + start_shard(context) + } +} + fn stop_shard(context: &mut Context) { let len = context.worker_executors.worker_executors.len(); @@ -122,6 +187,16 @@ fn stop_shard(context: &mut Context) { drop(we) // Not needed. Just making it explicit; } +fn stop_shards(context: &mut Context, n: usize) { + for _ in 1..=n { + stop_shard(context) + } +} + +fn stop_all_shards(context: &mut Context) { + stop_shards(context, context.worker_executors.worker_executors.len()) +} + fn reload_shard_manager(context: &mut Context) { let old_shard_manager = context.shard_manager.take(); drop(old_shard_manager); // Important! We should stop the old one first. @@ -133,44 +208,36 @@ fn reload_shard_manager(context: &mut Context) { } } -fn make_env_unstable(context: Context, stop_rx: Receiver<()>) { +fn env_handler(context: Context, command_rx: Receiver) { let mut context = context; println!("!!! Starting Golem Sharding Tester"); - fn worker(context: &mut Context) { - let mut commands = vec![ - Command::StartShard, - Command::StopShard, - Command::RestartShardManager, - ]; - let mut rng = rand::thread_rng(); - commands.shuffle(&mut rng); - match commands[0] { - Command::StartShard => { - println!("!!! Golem Sharding Tester starting shard"); - start_shard(context); - println!("!!! Golem Sharding Tester started shard"); + loop { + match command_rx.recv().unwrap() { + EnvCommand::StartShards(n) => { + println!("!!! Golem Sharding Tester starting shards({n})"); + start_shards(&mut context, n); + println!("!!! Golem Sharding Tester started shards({n})"); + } + EnvCommand::StopShards(n) => { + println!("!!! Golem Sharding Tester stopping shards{n}"); + stop_shards(&mut context, n); + println!("!!! Golem Sharding Tester stopped shard{n}"); } - Command::StopShard => { - println!("!!! Golem Sharding Tester stopping shard"); - stop_shard(context); - println!("!!! Golem Sharding Tester stopped shard"); + EnvCommand::StopAllShards => { + println!("!!! Golem Sharding Tester stopping all shards"); + stop_all_shards(&mut context); + println!("!!! Golem Sharding Tester stopped all shard"); } - Command::RestartShardManager => { + EnvCommand::RestartShardManager => { println!("!!! Golem Sharding Tester reloading shard manager"); - reload_shard_manager(context); + reload_shard_manager(&mut context); println!("!!! Golem Sharding Tester reloaded shard manager"); } + EnvCommand::Stop => break, } } - - while stop_rx.try_recv().is_err() { - let mut rng = rand::thread_rng(); - let n = rng.gen_range(1..10); - std::thread::sleep(Duration::from_secs(n)); - worker(&mut context); - } } fn upload_and_start_worker( @@ -298,7 +365,12 @@ fn get_invocation_key_invoke_and_await_with_retry( } fn service_is_responsive_to_shard_changes( - (context, cli): (Arc, CliLive), + (context, scenario, env_command_tx, cli): ( + Arc, + Scenario, + Sender, + CliLive, + ), ) -> Result<(), Failed> { let template_name = "echo-service-1".to_string(); @@ -319,7 +391,7 @@ fn service_is_responsive_to_shard_changes( let mut workers_with_names = Vec::new(); - for n in 1..=4 { + for n in 1..=scenario.workers_count { println!("*** WORKER {n} STARTING ***"); let worker_name = format!("echo-service-{n}"); let worker_id = upload_and_start_worker(&template, &worker_name, &cli)?; @@ -329,44 +401,99 @@ fn service_is_responsive_to_shard_changes( println!("*** ALL WORKERS STARTED ***"); - fn invoke_and_await_workers( - workers: &[(VersionedWorkerId, String)], - cli: &CliLive, - ) -> Result<(), Failed> { - let mut tasks = Vec::new(); - - for (worker, name) in workers { - let name = name.clone(); - let template_id = worker.worker_id.template_id.to_string(); - let cli = cli.clone(); - tasks.push(std::thread::spawn(move || { - get_invocation_key_invoke_and_await_with_retry( - &template_id, - &name, - "golem:it/api/echo", - r#"["Hello"]"#, - &cli, - ) - })); + let (worker_command_tx, worker_command_rx) = std::sync::mpsc::channel(); + let (worker_event_tx, worker_event_rx) = std::sync::mpsc::channel(); + + let workers_handler = std::thread::spawn(move || { + let workers_with_names = workers_with_names; + let cli = cli; + + loop { + match worker_command_rx.recv().unwrap() { + WorkerCommand::InvokeAndAwaitWorkers(name) => { + println!("*** INVOKING WORKERS: {name} ***"); + invoke_and_await_workers(&workers_with_names, &cli).unwrap(); + println!("*** INVOKING WORKERS {name} DONE ***"); + worker_event_tx + .send(WorkerEvent::InvokeAndAwaitWorkersCompleted(name)) + .unwrap(); + } + WorkerCommand::Stop => break, + } } + }); - for task in tasks { - let _ = task.join().unwrap()?; + for step in scenario.steps { + match step { + Step::StartShards(n) => { + env_command_tx.send(EnvCommand::StartShards(n)).unwrap(); + } + Step::StopShards(n) => { + env_command_tx.send(EnvCommand::StopShards(n)).unwrap(); + } + Step::StopAllShards => { + env_command_tx.send(EnvCommand::StopAllShards).unwrap(); + } + Step::RestartShardManager => { + env_command_tx + .send(EnvCommand::RestartShardManager) + .unwrap(); + } + Step::Sleep(duration) => { + std::thread::sleep(duration); + } + Step::InvokeAndAwaitWorkersAsync(name) => { + worker_command_tx + .send(WorkerCommand::InvokeAndAwaitWorkers(name)) + .unwrap(); + } + Step::WaitForInvokeAndAwaitResult => { + let _ = worker_event_rx.recv().unwrap(); + } } - - Ok(()) } - for c in 0..2 { - if c != 0 { - std::thread::sleep(Duration::from_secs(10)); - } - println!("*** INVOKING WORKERS {c} ***"); - invoke_and_await_workers(&workers_with_names, &cli)?; - println!("*** INVOKING WORKERS {c} DONE ***"); + worker_command_tx.send(WorkerCommand::Stop).unwrap(); + env_command_tx.send(EnvCommand::Stop).unwrap(); + + workers_handler.join().unwrap(); + + Ok(()) +} + +enum WorkerCommand { + InvokeAndAwaitWorkers(String), + Stop, +} + +enum WorkerEvent { + InvokeAndAwaitWorkersCompleted(String), +} + +fn invoke_and_await_workers( + workers: &[(VersionedWorkerId, String)], + cli: &CliLive, +) -> Result<(), Failed> { + let mut tasks = Vec::new(); + + for (worker, name) in workers { + let name = name.clone(); + let template_id = worker.worker_id.template_id.to_string(); + let cli = cli.clone(); + tasks.push(std::thread::spawn(move || { + get_invocation_key_invoke_and_await_with_retry( + &template_id, + &name, + "golem:it/api/echo", + r#"["Hello"]"#, + &cli, + ) + })); } - println!("*** TEST COMPLETED ***"); + for task in tasks { + let _ = task.join().unwrap()?; + } Ok(()) }