From 1fd02e8ff71a8fc79ce499e6b2ae632929d5e167 Mon Sep 17 00:00:00 2001 From: Tobias de Bruijn Date: Mon, 19 Feb 2024 21:30:30 +0100 Subject: [PATCH] Implement Ctrl+\ listener, add --start-at-dir, fix broken image `continue` if image uploading failed. Signed-off-by: Tobias de Bruijn --- colorizer/Cargo.lock | 85 ++++++++++++++++++++++++++++++++++- colorizer/Cargo.toml | 3 +- colorizer/README.md | 5 +++ colorizer/src/args.rs | 4 ++ colorizer/src/main.rs | 101 ++++++++++++++++++++++++++++++++++++++---- colorizer/src/pxl.rs | 11 +++++ 6 files changed, 198 insertions(+), 11 deletions(-) diff --git a/colorizer/Cargo.lock b/colorizer/Cargo.lock index ac42674..fc95c92 100644 --- a/colorizer/Cargo.lock +++ b/colorizer/Cargo.lock @@ -273,6 +273,7 @@ dependencies = [ "reqwest-protobuf", "serde", "serde_json", + "term", "thiserror", "tokio", "tracing", @@ -296,6 +297,27 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "either" version = "1.9.0" @@ -427,6 +449,17 @@ dependencies = [ "slab", ] +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gimli" version = "0.28.0" @@ -658,6 +691,17 @@ version = "0.2.148" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.0", + "libc", + "redox_syscall 0.4.1", +] + [[package]] name = "linux-raw-sys" version = "0.4.8" @@ -801,7 +845,7 @@ checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.3.5", "smallvec", "windows-targets", ] @@ -935,6 +979,26 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_users" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" version = "1.9.6" @@ -1096,6 +1160,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "ryu" version = "1.0.15" @@ -1277,11 +1347,22 @@ checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix", "windows-sys", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "thiserror" version = "1.0.49" diff --git a/colorizer/Cargo.toml b/colorizer/Cargo.toml index dde208e..5277cd4 100644 --- a/colorizer/Cargo.toml +++ b/colorizer/Cargo.toml @@ -18,4 +18,5 @@ serde_json = "1.0.104" uuid = "1.4.1" lazy_static = "1.4.0" regex = "1.9.6" -chrono = "0.4.31" \ No newline at end of file +chrono = "0.4.31" +term = "0.7.0" \ No newline at end of file diff --git a/colorizer/README.md b/colorizer/README.md index 68e12e8..c82c34b 100644 --- a/colorizer/README.md +++ b/colorizer/README.md @@ -27,6 +27,7 @@ The following environmental variables must be set: --s3-region \ --s3-endpoint-url \ --s3-force-path-style + --start-at-dir ``` This will run Colorizer, it'll convert albums one at a time. @@ -35,5 +36,9 @@ This might take a while, depending on the size of your Pxl instance. >Warning: If for whatever reason Colorizer fails halfway through a migration, it is recommended to revert back to the backup. >Colorizer has **NO** recovery mechanism whatsoever. +## Quitting halfway through +If you want to quit halfway through, use `Ctrl + \`, this will finish the current album and then exit. +You can then later continue with the ``--start-at-dir`` flag. + ## License MIT or Apache 2.0, at your option. \ No newline at end of file diff --git a/colorizer/src/args.rs b/colorizer/src/args.rs index 7501fd8..313cd35 100644 --- a/colorizer/src/args.rs +++ b/colorizer/src/args.rs @@ -15,6 +15,10 @@ pub struct Args { #[clap(env)] pub chroma_service_token: String, + /// Name of the Pxl album directory to start at, all albums before (chronologically) are ignored. + #[clap(long)] + pub start_at_dir: Option, + /// Pxl `state.json` file. #[clap(long)] pub metadata_file: PathBuf, diff --git a/colorizer/src/main.rs b/colorizer/src/main.rs index b6287b0..7bc85cb 100644 --- a/colorizer/src/main.rs +++ b/colorizer/src/main.rs @@ -5,7 +5,10 @@ use crate::pxl_metadata::MetadataFile; use clap::Parser; use color_eyre::eyre::Error; use std::path::Path; -use tracing::info; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tracing::{info, warn}; use tracing_subscriber::fmt::layer; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -33,27 +36,109 @@ async fn main() -> color_eyre::Result<()> { let file_tree = PxlFileTree::new(args.pxl_dir_base); let albums = file_tree.get_albums()?; + // Sort them chronologically let albums = sort_by_created(&args.metadata_file, albums).await?; + let albums = if let Some(start_at_dir) = &args.start_at_dir { + let albums = albums + .into_iter() + .skip_while(|a| a.get_dir_name().ne(start_at_dir)) + .collect::>(); + + info!( + "--start-at-dir provided, starting at directory {start_at_dir}. This leaves {} albums", + albums.len() + ); + + albums + } else { + albums + }; + info!("Processing {} albums.", albums.len()); + // Quit after this album on SIGQUIT, this is Ctrl + \ on Linux. + let quit = Arc::new(AtomicBool::new(false)); + tokio::task::spawn({ + let quit = Arc::clone(&quit); + + async move { + let mut stream = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::quit()) + .expect("Failed to register signal"); + loop { + stream.recv().await; + quit.store(true, Ordering::Relaxed); + } + } + }); + // We could parallelize this, but to keep the server load in check and avoid HTTP 429's, we don't. - for album in albums { + for (album_idx, album) in albums.iter().enumerate() { + if quit.load(Ordering::Relaxed) { + info!("Ctrl+\\ was pressed, quitting. The name of this album is {}, for resuming next time.", album.name); + return Ok(()); + } + let images = album.get_photos()?; - info!("Processing album {}. {} Images.", album.name, images.len()); - let album_id = chroma.create_album(album.name).await?; + info!( + "Processing album {}. {} Images. (Album {}/{})", + album.name, + images.len(), + album_idx + 1, + albums.len() + ); + let album_id = chroma.create_album(album.name.clone()).await?; let mut first_photo = None; - for photo in images { + let mut sigquit_message_printed = false; + 'photo_loop: for (image_idx, photo) in images.iter().enumerate() { + if quit.load(Ordering::Relaxed) && !sigquit_message_printed { + warn!("Ctrl+\\ was pressed, quitting after this album"); + sigquit_message_printed = true; + } + let components = photo.s3_url.split("_").collect::>(); let first = components.first().ok_or(Error::msg("Invalid src"))?; let s3_url = format!("{first}_o.jpg"); - info!("Uploading {}", s3_url); - let photo_bytes = reqwest::get(s3_url).await?.bytes().await?.to_vec(); - let image_id = chroma.create_photo(&album_id, photo_bytes).await?; + let photo_bytes = loop { + match reqwest::get(s3_url.clone()).await { + Ok(v) => break v.bytes().await?.to_vec(), + Err(e) => { + warn!("Failed to download image from Pxl S3 bucket: {e}"); + tokio::time::sleep(Duration::from_secs(5)).await; + } + }; + }; + + info!( + "Uploading {} (image {}/{})", + s3_url, + image_idx + 1, + images.len() + ); + let mut retry_counter = 0; + let image_id = loop { + match chroma.create_photo(&album_id, photo_bytes.clone()).await { + Ok(v) => break v, + Err(e) => { + if retry_counter >= 3 { + warn!( + "Skipping photo '{}' belonging to Chroma album '{}' named '{}'", + photo.s3_url, album_id, album.name, + ); + + continue 'photo_loop; + } + + warn!("Failed to upload photo to Chroma: {e}. Trying again."); + tokio::time::sleep(Duration::from_secs(5)).await; + retry_counter += 1; + } + } + }; info!("Created Chroma photo {image_id}"); diff --git a/colorizer/src/pxl.rs b/colorizer/src/pxl.rs index cf31966..d00ae37 100644 --- a/colorizer/src/pxl.rs +++ b/colorizer/src/pxl.rs @@ -69,6 +69,17 @@ impl PxlFileTree { } impl PxlAlbum { + pub fn get_dir_name(&self) -> String { + self.dir + .components() + .last() + .expect("There is no directory name") + .as_os_str() + .to_str() + .expect("Directory name is not UTF-8") + .to_string() + } + pub fn get_photos(&self) -> Result> { let rd = fs::read_dir(&self.dir)?; let photo_src_regex = Regex::new(r#"