From c110810d81ef3a68b4fea23d6b01925166db905c Mon Sep 17 00:00:00 2001 From: guza Date: Mon, 9 Sep 2024 08:01:45 +0800 Subject: [PATCH] use async io --- Cargo.lock | 229 +++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/cmd.rs | 22 ++-- src/input_processor.rs | 186 +++++++++++++++++++-------------- src/lib.rs | 2 +- src/main.rs | 7 +- 6 files changed, 355 insertions(+), 92 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 29a053e..1e023a6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "anstream" version = "0.6.15" @@ -51,6 +66,45 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bytes" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "clap" version = "4.5.17" @@ -97,18 +151,111 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "gimli" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "libc" +version = "0.2.158" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +dependencies = [ + "hermit-abi", + "libc", + "wasi", + "windows-sys", +] + +[[package]] +name = "object" +version = "0.36.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" +dependencies = [ + "memchr", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -127,6 +274,52 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "strsim" version = "0.11.1" @@ -144,6 +337,35 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tokio" +version = "1.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys", +] + +[[package]] +name = "tokio-macros" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -156,11 +378,18 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "wc" version = "0.1.0" dependencies = [ "clap", + "tokio", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 56b91e3..e447626 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ edition = "2021" [dependencies] clap = { version = "4.3", features = ["derive"] } +tokio = { version = "1.38.0", features = ["full"] } \ No newline at end of file diff --git a/src/cmd.rs b/src/cmd.rs index 539b65f..0728899 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -1,6 +1,6 @@ -use std::fs::File; -use std::io::{self, BufReader}; -use clap::{Parser, CommandFactory}; // Add CommandFactory here +use clap::{CommandFactory, Parser}; +use tokio::fs::File; +use tokio::io::{self, BufReader}; use crate::input_processor::{process_input, CountOptions}; @@ -59,23 +59,23 @@ impl Cli { } } -pub fn run() -> io::Result<()> { +pub async fn run() -> io::Result<()> { let (cli, options) = Cli::parse_args(); - let mut stdout = io::stdout(); + let mut stdout = tokio::io::stdout(); if cli.files.is_empty() { - let stdin = io::stdin(); - let mut handle = stdin.lock(); - if let Err(err) = process_input(&mut handle, &mut stdout, &options) { + let stdin = tokio::io::stdin(); + let mut reader = BufReader::new(stdin); + if let Err(err) = process_input(&mut reader, &mut stdout, &options).await { eprintln!("Error processing stdin: {}", err); } } else { for filename in cli.files { - match File::open(&filename) { + match File::open(&filename).await { Ok(file) => { let mut reader = BufReader::new(file); - if let Err(err) = process_input(&mut reader, &mut stdout, &options) { + if let Err(err) = process_input(&mut reader, &mut stdout, &options).await { eprintln!("Error processing file '{}': {}", filename, err); } } @@ -87,4 +87,4 @@ pub fn run() -> io::Result<()> { } Ok(()) -} \ No newline at end of file +} diff --git a/src/input_processor.rs b/src/input_processor.rs index 3a909c9..8a858d4 100644 --- a/src/input_processor.rs +++ b/src/input_processor.rs @@ -1,4 +1,4 @@ -use std::io::{self, Read, Write}; +use tokio::io::{self, AsyncBufReadExt, AsyncReadExt, AsyncWriteExt}; pub struct CountOptions { pub show_lines: bool, @@ -6,31 +6,25 @@ pub struct CountOptions { pub show_bytes: bool, } -pub fn process_input( +pub async fn process_input( reader: &mut R, writer: &mut W, options: &CountOptions, -) -> io::Result<()> { +) -> io::Result<()> +where + R: AsyncBufReadExt + AsyncReadExt + Unpin, + W: AsyncWriteExt + Unpin, +{ let mut line_count = 0; let mut word_count = 0; + let mut char_count = 0; - let mut buffer = Vec::new(); - reader.read_to_end(&mut buffer)?; - let char_count = String::from_utf8_lossy(&buffer).chars().count(); - - let lines = buffer.split(|&b| b == b'\n'); - let mut lines_iter = lines.peekable(); - - while let Some(line) = lines_iter.next() { - if lines_iter.peek().is_none() && line.is_empty() { - break; - } - + let mut buffer = String::new(); + while reader.read_line(&mut buffer).await? > 0 { line_count += 1; - word_count += line - .split(|&b| b.is_ascii_whitespace()) - .filter(|&w| !w.is_empty()) - .count(); + word_count += buffer.split_whitespace().count(); + char_count += buffer.chars().count(); + buffer.clear(); } let mut output = String::new(); @@ -44,208 +38,246 @@ pub fn process_input( output.push_str(&format!("{:8}", char_count)); } - writeln!(writer, "{}", output)?; + writer.write_all(output.as_bytes()).await?; + writer.write_all(b"\n").await?; Ok(()) } +// Update tests to use tokio's runtime #[cfg(test)] mod tests { use super::*; - use std::io::Cursor; + use tokio::io::BufReader; - #[test] - fn test_empty_input() { - let mut input = Cursor::new(b""); + #[tokio::test] + async fn test_empty_input() { + let input = b""; + let mut reader = BufReader::new(&input[..]); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); - assert_eq!( - String::from_utf8(output).unwrap(), - " 0 0 0\n" - ); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); + assert_eq!(output, b" 0 0 0\n"); } - #[test] - fn test_single_word() { - let mut input = Cursor::new(b"hello"); + #[tokio::test] + async fn test_single_word() { + let input = b"hello"; + let mut reader = BufReader::new(&input[..]); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 1 1 5\n" ); } - #[test] - fn test_multiple_words() { - let mut input = Cursor::new(b"hello world\nrust is great"); + #[tokio::test] + async fn test_multiple_words() { + let input = b"hello world\nrust is great"; + let mut reader = BufReader::new(&input[..]); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 2 5 25\n" ); } - #[test] - fn test_show_lines_only() { - let mut input = Cursor::new(b"hello\nworld\n"); + #[tokio::test] + async fn test_show_lines_only() { + let input = b"hello\nworld\n"; + let mut reader = BufReader::new(&input[..]); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: false, show_bytes: false, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!(String::from_utf8(output).unwrap(), " 2\n"); } - #[test] - fn test_show_words_only() { - let mut input = Cursor::new(b"hello world rust"); + #[tokio::test] + async fn test_show_words_only() { + let input = b"hello world rust"; + let mut reader = BufReader::new(&input[..]); let mut output = Vec::new(); let options = CountOptions { show_lines: false, show_words: true, show_bytes: false, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!(String::from_utf8(output).unwrap(), " 3\n"); } - #[test] - fn test_show_bytes_only() { - let mut input = Cursor::new(b"hello\n"); + #[tokio::test] + async fn test_show_bytes_only() { + let input = b"hello\n"; + let mut reader = BufReader::new(&input[..]); let mut output = Vec::new(); let options = CountOptions { show_lines: false, show_words: false, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!(String::from_utf8(output).unwrap(), " 6\n"); } - #[test] - fn test_utf8_characters() { - let mut input = Cursor::new("Hello, δΈ–η•Œ!\n"); + #[tokio::test] + async fn test_utf8_characters() { + let input = "Hello, δΈ–η•Œ!\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 1 2 11\n" ); } - #[test] - fn test_multi_byte_characters() { - let mut input = Cursor::new("πŸš€ Rust πŸ’»\n"); + #[tokio::test] + async fn test_multi_byte_characters() { + let input = "πŸš€ Rust πŸ’»\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 1 3 9\n" ); } - #[test] - fn test_korean_characters() { - let mut input = Cursor::new("μ•ˆλ…•ν•˜μ„Έμš” 세계!\n"); + #[tokio::test] + async fn test_korean_characters() { + let input = "μ•ˆλ…•ν•˜μ„Έμš” 세계!\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 1 2 10\n" ); } - #[test] - fn test_japanese_characters() { - let mut input = Cursor::new("こんにけは δΈ–η•ŒοΌ\n"); + #[tokio::test] + async fn test_japanese_characters() { + let input = "こんにけは δΈ–η•ŒοΌ\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 1 2 10\n" ); } - #[test] - fn test_mixed_languages() { - let mut input = Cursor::new("Hello μ•ˆλ…• こんにけは World!\n"); + #[tokio::test] + async fn test_mixed_languages() { + let input = "Hello μ•ˆλ…• こんにけは World!\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 1 4 22\n" ); } - #[test] - fn test_korean_multiline() { - let mut input = Cursor::new("μ•ˆλ…•ν•˜μ„Έμš”\nμ„Έκ³„μž…λ‹ˆλ‹€\n"); + #[tokio::test] + async fn test_korean_multiline() { + let input = "μ•ˆλ…•ν•˜μ„Έμš”\nμ„Έκ³„μž…λ‹ˆλ‹€\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 2 2 12\n" ); } - #[test] - fn test_japanese_multiline() { - let mut input = Cursor::new("こんにけは\nδΈ–η•Œγ§γ™\n"); + #[tokio::test] + async fn test_japanese_multiline() { + let input = "こんにけは\nδΈ–η•Œγ§γ™\n"; + let mut reader = BufReader::new(input.as_bytes()); let mut output = Vec::new(); let options = CountOptions { show_lines: true, show_words: true, show_bytes: true, }; - let _ = process_input(&mut input, &mut output, &options); + process_input(&mut reader, &mut output, &options) + .await + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), " 2 2 11\n" diff --git a/src/lib.rs b/src/lib.rs index 9299c2b..088922e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,2 @@ +pub mod cmd; pub mod input_processor; -pub mod cmd; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index fcb3390..7b9133d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,11 @@ use std::process; -mod input_processor; mod cmd; +mod input_processor; -fn main() { - if let Err(err) = cmd::run() { +#[tokio::main] +async fn main() { + if let Err(err) = cmd::run().await { eprintln!("Error: {}", err); process::exit(1); }