diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index a8c9de0d9..8e636fca1 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -127,13 +127,10 @@ jobs: with: fetch-depth: 0 - - name: (Windows) Install asiosdk + - name: (Windows) Install llvm if: ${{ matrix.arch == 'x86_64-pc-windows-msvc' }} shell: PowerShell run: | - curl https://www.steinberg.net/asiosdk -o asiosdk.zip - Expand-Archive .\asiosdk.zip -DestinationPath .\ - choco install asio4all choco install llvm # This is a workaround for NSIS bundle size limits https://nsis.sourceforge.io/Talk:Special_Builds @@ -254,7 +251,6 @@ jobs: VITE_POSTHOG_API_KEY: ${{ secrets.VITE_POSTHOG_API_KEY }} SECRET_DESKTOP_INSTALLATION_PROOF_KEY: ${{secrets.SECRET_DESKTOP_INSTALLATION_PROOF_KEY}} # Windows Only - CPAL_ASIO_DIR: ${{ github.workspace }}/asiosdk_2.3.3_2019-06-14 LIBCLANG_PATH: C:\Program Files\LLVM\bin NODE_OPTIONS: "--max_old_space_size=8192" - name: Prepare binary files diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index 87e7c622d..e32ec0571 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -125,13 +125,10 @@ jobs: with: fetch-depth: 0 - - name: (Windows) Install asiosdk} + - name: (Windows) Install llvm if: ${{ matrix.arch == 'x86_64-pc-windows-msvc' }} shell: PowerShell run: | - curl https://www.steinberg.net/asiosdk -o asiosdk.zip - Expand-Archive .\asiosdk.zip -DestinationPath .\ - choco install asio4all choco install llvm # This is a workaround for NSIS bundle size limits https://nsis.sourceforge.io/Talk:Special_Builds @@ -248,7 +245,6 @@ jobs: VITE_POSTHOG_API_KEY: ${{ secrets.VITE_POSTHOG_API_KEY }} SECRET_DESKTOP_INSTALLATION_PROOF_KEY: ${{ secrets.SECRET_DESKTOP_INSTALLATION_PROOF_KEY }} # Windows Only - CPAL_ASIO_DIR: ${{ github.workspace }}/asiosdk_2.3.3_2019-06-14 LIBCLANG_PATH: C:\Program Files\LLVM\bin NODE_OPTIONS: "--max_old_space_size=8192" - name: Prepare binary files diff --git a/apps/shinkai-desktop/src-tauri/Cargo.lock b/apps/shinkai-desktop/src-tauri/Cargo.lock index 5acea2596..f344414b4 100644 --- a/apps/shinkai-desktop/src-tauri/Cargo.lock +++ b/apps/shinkai-desktop/src-tauri/Cargo.lock @@ -70,27 +70,6 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" -[[package]] -name = "alsa" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37fe60779335388a88c01ac6c3be40304d1e349de3ada3b15f7808bb90fa9dce" -dependencies = [ - "alsa-sys", - "bitflags 2.5.0", - "libc", -] - -[[package]] -name = "alsa-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527" -dependencies = [ - "libc", - "pkg-config", -] - [[package]] name = "android-tzdata" version = "0.1.1" @@ -180,19 +159,6 @@ dependencies = [ "zbus", ] -[[package]] -name = "asio-sys" -version = "0.2.2" -source = "git+https://github.com/RustAudio/cpal.git#2ec761d30b35dbbacfecb41a2f5985781a7d52d1" -dependencies = [ - "bindgen 0.69.4", - "cc", - "num-derive", - "num-traits", - "parse_cfg", - "walkdir", -] - [[package]] name = "async-broadcast" version = "0.7.1" @@ -427,29 +393,6 @@ dependencies = [ "which", ] -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags 2.5.0", - "cexpr", - "clang-sys", - "itertools", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.55", - "which", -] - [[package]] name = "bit-set" version = "0.5.3" @@ -725,10 +668,6 @@ name = "cc" version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" -dependencies = [ - "jobserver", - "libc", -] [[package]] name = "cesu8" @@ -992,50 +931,6 @@ dependencies = [ "libc", ] -[[package]] -name = "coreaudio-rs" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace" -dependencies = [ - "bitflags 1.3.2", - "core-foundation-sys", - "coreaudio-sys", -] - -[[package]] -name = "coreaudio-sys" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f01585027057ff5f0a5bf276174ae4c1594a2c5bde93d5f46a016d76270f5a9" -dependencies = [ - "bindgen 0.69.4", -] - -[[package]] -name = "cpal" -version = "0.15.3" -source = "git+https://github.com/RustAudio/cpal.git#2ec761d30b35dbbacfecb41a2f5985781a7d52d1" -dependencies = [ - "alsa", - "asio-sys", - "core-foundation-sys", - "coreaudio-rs", - "dasp_sample", - "jni", - "js-sys", - "libc", - "mach2", - "ndk 0.8.0", - "ndk-context", - "num-traits", - "oboe", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "windows 0.54.0", -] - [[package]] name = "cpufeatures" version = "0.2.12" @@ -1209,12 +1104,6 @@ dependencies = [ "syn 2.0.55", ] -[[package]] -name = "dasp_sample" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f" - [[package]] name = "der" version = "0.7.9" @@ -2585,15 +2474,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "0.4.8" @@ -2651,15 +2531,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" -[[package]] -name = "jobserver" -version = "0.1.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" -dependencies = [ - "libc", -] - [[package]] name = "js-sys" version = "0.3.69" @@ -2872,15 +2743,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" -[[package]] -name = "mach2" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" -dependencies = [ - "libc", -] - [[package]] name = "malloc_buf" version = "0.0.6" @@ -3038,20 +2900,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "ndk" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2076a31b7010b17a38c01907c45b945e8f11495ee4dd588309718901b1f7a5b7" -dependencies = [ - "bitflags 2.5.0", - "jni-sys", - "log", - "ndk-sys 0.5.0+25.2.9519653", - "num_enum", - "thiserror", -] - [[package]] name = "ndk" version = "0.9.0" @@ -3141,17 +2989,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" -[[package]] -name = "num-derive" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.55", -] - [[package]] name = "num-traits" version = "0.2.18" @@ -3438,29 +3275,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "oboe" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8b61bebd49e5d43f5f8cc7ee2891c16e0f41ec7954d36bcb6c14c5e0de867fb" -dependencies = [ - "jni", - "ndk 0.8.0", - "ndk-context", - "num-derive", - "num-traits", - "oboe-sys", -] - -[[package]] -name = "oboe-sys" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8bb09a4a2b1d668170cfe0a7d5bc103f8999fb316c98099b6a9939c9f2e79d" -dependencies = [ - "cc", -] - [[package]] name = "once_cell" version = "1.19.0" @@ -3613,15 +3427,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "parse_cfg" -version = "4.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "905787a434a2c721408e7c9a252e85f3d93ca0f118a5283022636c0e05a7ea49" -dependencies = [ - "nom", -] - [[package]] name = "paste" version = "1.0.15" @@ -3911,16 +3716,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" -[[package]] -name = "prettyplease" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" -dependencies = [ - "proc-macro2", - "syn 2.0.55", -] - [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -4848,7 +4643,6 @@ dependencies = [ "base64 0.22.1", "blake3", "chrono", - "cpal", "ed25519-dalek", "futures-util", "hex", @@ -5221,7 +5015,7 @@ dependencies = [ "lazy_static", "libc", "log", - "ndk 0.9.0", + "ndk", "ndk-context", "ndk-sys 0.6.0+11769913", "objc", @@ -6614,7 +6408,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "094a5bd86f6f52562bbc74c28f27cd80197e54656cfb7213cf4ba37b5246cc9e" dependencies = [ - "bindgen 0.64.0", + "bindgen", "cfg-if", ] @@ -6679,16 +6473,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows" -version = "0.54.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49" -dependencies = [ - "windows-core 0.54.0", - "windows-targets 0.52.6", -] - [[package]] name = "windows" version = "0.58.0" @@ -6708,16 +6492,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-core" -version = "0.54.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65" -dependencies = [ - "windows-result 0.1.2", - "windows-targets 0.52.6", -] - [[package]] name = "windows-core" version = "0.58.0" @@ -6726,7 +6500,7 @@ checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" dependencies = [ "windows-implement", "windows-interface", - "windows-result 0.2.0", + "windows-result", "windows-strings", "windows-targets 0.52.6", ] @@ -6759,20 +6533,11 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ - "windows-result 0.2.0", + "windows-result", "windows-strings", "windows-targets 0.52.6", ] -[[package]] -name = "windows-result" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-result" version = "0.2.0" @@ -6788,7 +6553,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" dependencies = [ - "windows-result 0.2.0", + "windows-result", "windows-targets 0.52.6", ] @@ -7072,7 +6837,7 @@ dependencies = [ "jni", "kuchikiki", "libc", - "ndk 0.9.0", + "ndk", "objc2", "objc2-app-kit", "objc2-foundation", diff --git a/apps/shinkai-desktop/src-tauri/Cargo.toml b/apps/shinkai-desktop/src-tauri/Cargo.toml index 4f1197a7a..a7329d0d8 100644 --- a/apps/shinkai-desktop/src-tauri/Cargo.toml +++ b/apps/shinkai-desktop/src-tauri/Cargo.toml @@ -17,7 +17,6 @@ tauri = { version = "2.0.2", features = [ "macos-private-api", "tray-icon", "ima serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" whisper-rs = "0.8.0" -cpal = { git = "https://github.com/RustAudio/cpal.git", features = ["asio"] } webrtc-vad = "0.2" hound = "3.4.0" # fix this dependency later on diff --git a/apps/shinkai-desktop/src-tauri/src/audio/mod.rs b/apps/shinkai-desktop/src-tauri/src/audio/mod.rs deleted file mode 100644 index e23a60e0c..000000000 --- a/apps/shinkai-desktop/src-tauri/src/audio/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod transcribe; diff --git a/apps/shinkai-desktop/src-tauri/src/audio/transcribe.rs b/apps/shinkai-desktop/src-tauri/src/audio/transcribe.rs deleted file mode 100644 index fd240f3cd..000000000 --- a/apps/shinkai-desktop/src-tauri/src/audio/transcribe.rs +++ /dev/null @@ -1,151 +0,0 @@ -use cpal::traits::{DeviceTrait, StreamTrait}; -use std::convert::TryInto; -use std::sync::{Arc, Mutex}; -use std::time::{Duration, Instant}; -use webrtc_vad::Vad; -use whisper_rs::{FullParams, SamplingStrategy, WhisperContext}; - -pub fn run( - device: &cpal::Device, - config: cpal::StreamConfig, - err_fn: fn(cpal::StreamError), - is_activated: Arc>, - ctx: Arc>, -) where - T: cpal::Sample + cpal::SizedSample + Into, -{ - // Testing code to check that the audio capture is working - let spec = hound::WavSpec { - channels: 1, - sample_rate: config.sample_rate.0, - bits_per_sample: 32, - sample_format: hound::SampleFormat::Float, - }; - let mut writer = hound::WavWriter::create("../../output.wav", spec).unwrap(); - let config_clone = config.clone(); - - // Create a buffer to accumulate audio data - let buffer = Arc::new(Mutex::new(Vec::new())); - - // Initialize the VAD - let vad = Arc::new(Mutex::new( - Vad::new(config.sample_rate.0.try_into().unwrap()).unwrap(), - )); - vad.lock() - .unwrap() - .fvad_set_mode(webrtc_vad::VadMode::VeryAggressive) - .unwrap(); - - println!("Selected input device: {}", device.name().unwrap()); - let mut start_time = None; - let mut last_voice_activity = None; - - // Normal Code - let stream = device - .build_input_stream( - &config_clone, - move |data: &[T], _: &cpal::InputCallbackInfo| { - // Convert the incoming audio data to f32 and add it to the buffer - let mut buffer = buffer.lock().unwrap(); - buffer.extend(data.iter().map(|sample| (*sample).into())); - - let mut is_activated = is_activated.lock().unwrap(); - let mut vad = Vad::new(config.sample_rate.0.try_into().unwrap()).unwrap(); - vad.fvad_set_mode(webrtc_vad::VadMode::VeryAggressive) - .unwrap(); - - // print buffer length - // println!("Buffer length: {}", buffer.len()); - if !buffer.is_empty() && buffer.len() >= 480 { - // println!("Buffer length: {}", buffer.len()); - let buffer_i16: Vec = buffer - .iter() - .map(|&f| (f * i16::MAX as f32) as i16) - .collect(); - let is_voice = vad - .is_voice_segment(&buffer_i16[(buffer.len() - 480)..]) - .unwrap(); - // println!("Is voice: {}", is_voice); - - if is_voice { - last_voice_activity = Some(Instant::now()); - if !*is_activated { - // Start recording - *is_activated = true; - start_time = Some(Instant::now()); - println!("Started recording"); - } - } else if let Some(last_voice_activity) = last_voice_activity { - if last_voice_activity.elapsed() > Duration::from_secs_f32(1.0) - && *is_activated - { - // Stop recording - *is_activated = false; - if let Some(start_time) = start_time { - let duration = start_time.elapsed(); - println!("Stopped recording after {} seconds", duration.as_secs()); - - // Process the audio data here - let buffer_clone = buffer.clone(); - for sample in buffer_clone { - let sample_f32: f32 = sample.into(); - writer.write_sample(sample_f32).unwrap(); - } - let ctx = ctx.lock().unwrap(); - let mut state = ctx.create_state().expect("failed to create state"); - - let mut params = - FullParams::new(SamplingStrategy::Greedy { best_of: 1 }); - params.set_n_threads(2); - params.set_print_special(false); - params.set_print_progress(false); - params.set_print_realtime(false); - params.set_print_timestamps(false); - - let audio_data = - whisper_rs::convert_stereo_to_mono_audio(&buffer).unwrap(); - - state - .full(params, &audio_data) - .expect("failed to run model"); - - let num_segments = state - .full_n_segments() - .expect("failed to get number of segments"); - for i in 0..num_segments { - let segment = state - .full_get_segment_text(i) - .expect("failed to get segment"); - println!("Segment {}: {}", i, segment); - - let start_timestamp = state - .full_get_segment_t0(i) - .expect("failed to get segment start timestamp"); - let end_timestamp = state - .full_get_segment_t1(i) - .expect("failed to get segment end timestamp"); - println!( - "[{} - {}]: {}", - start_timestamp, end_timestamp, segment - ); - } - - // Clear the buffer - buffer.clear(); - - // Reset the is_activated flag - *is_activated = false; - } - } - } - } - }, - err_fn, - None, - ) - .unwrap(); - stream.play().unwrap(); - loop { - std::thread::sleep(std::time::Duration::from_millis(100)); - } -} diff --git a/apps/shinkai-desktop/src-tauri/src/main.rs b/apps/shinkai-desktop/src-tauri/src/main.rs index 923efb09e..c25cf1702 100644 --- a/apps/shinkai-desktop/src-tauri/src/main.rs +++ b/apps/shinkai-desktop/src-tauri/src/main.rs @@ -23,7 +23,6 @@ use tokio::sync::Mutex; use tray::create_tray; use windows::{recreate_window, Window}; -mod audio; mod commands; mod galxe; mod global_shortcuts;