diff --git a/Cargo.toml b/Cargo.toml index 99ee863..9598b3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -opencv = "0.92.2" tch = "0.17.0" [[example]] diff --git a/code b/code index ccd9c57..036ff09 100755 --- a/code +++ b/code @@ -4,4 +4,4 @@ export LIBTORCH_LIB=$(pwd)/libtorch/ export LD_LIBRARY_PATH="$LIBTORCH/lib/:$LD_LIBRARY_PATH" -code +code . diff --git a/examples/predict/main.rs b/examples/predict/main.rs index d4b4ede..c033a2f 100644 --- a/examples/predict/main.rs +++ b/examples/predict/main.rs @@ -1,16 +1,16 @@ use tch::TchError; use yolo_v8::{Image, YoloV8Classifier, YoloV8ObjectDetection, YoloV8Segmentation}; -fn object_detection() { +fn object_detection(path: &str) { // Load image to perform object detection, note that YOLOv8 resolution must match // scaling width and height here - let mut image = Image::new("images/bus.jpg", YoloV8ObjectDetection::input_dimension()); + let mut image = Image::new(path, YoloV8ObjectDetection::input_dimension()); // Load exported torchscript for object detection let yolo = YoloV8ObjectDetection::new(); // Predict with non-max-suppression in the end - let bboxes = yolo.predict(&image, 0.15, 0.35); + let bboxes = yolo.predict(&image, 0.25, 0.7); println!("bboxes={:?}", bboxes); // Draw rectangles around detected objects @@ -19,9 +19,9 @@ fn object_detection() { image.save("images/result2.jpg"); } -fn image_classification() { +fn image_classification(path: &str) { // Load image to perform image classification - let image = Image::new("images/test.jpg", YoloV8Classifier::input_dimension()); + let image = Image::new(path, YoloV8Classifier::input_dimension()); // Load exported torchscript for object detection let yolo = YoloV8Classifier::new(); @@ -39,10 +39,10 @@ fn image_segmentation() { let classes = yolo.predict(&image); } -// YOLOv8n (nano model) for object detection in image +// YOLOv8n for object detection in image fn main() -> Result<(), TchError> { - object_detection(); - // image_classification(); + object_detection("images/katri.jpg"); + // image_classification("images/katri.jpg"); // image_segmentation(); Ok(()) } diff --git a/images/katri.jpg b/images/katri.jpg new file mode 100644 index 0000000..9f8f50a Binary files /dev/null and b/images/katri.jpg differ diff --git a/src/lib.rs b/src/lib.rs index fd6d6a6..116bd57 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,10 +98,10 @@ impl YoloV8ObjectDetection { } pub fn predict(&self, image: &Image, conf_thresh: f64, iou_thresh: f64) -> Vec { - println!("predict(): image={:?}", image.scaled_image); + // println!("predict(): image={:?}", image.scaled_image); let pred = self.yolo.predict(image); - println!("pred={:?}", pred); - self.non_max_suppression(&pred.get(0), conf_thresh, iou_thresh) + // println!("pred={:?}", pred); + self.non_max_suppression(image, &pred.get(0), conf_thresh, iou_thresh) } fn iou(&self, b1: &BBox, b2: &BBox) -> f64 { @@ -117,6 +117,7 @@ impl YoloV8ObjectDetection { fn non_max_suppression( &self, + image: &Image, prediction: &tch::Tensor, conf_thresh: f64, iou_thresh: f64, @@ -143,11 +144,33 @@ impl YoloV8ObjectDetection { // CLASSES[class_index] // ); + let (_, orig_h, orig_w) = image.image.size3().unwrap(); + let (_, sh, sw) = image.scaled_image.size3().unwrap(); + let cx = sw as f64 / 2.0; + let cy = sh as f64 / 2.0; + let mut dx = pred[0] - cx; + let mut dy = pred[1] - cy; + let mut w = pred[2]; + let mut h = pred[3]; + + let aspect = orig_w as f64 / orig_h as f64; + + if orig_w > orig_h { + dy *= aspect; + h *= aspect; + } else { + dx /= aspect; + w /= aspect; + } + + let x = cx + dx; + let y = cy + dy; + let bbox = BBox { - xmin: pred[0] - pred[2] / 2., - ymin: pred[1] - pred[3] / 2., - xmax: pred[0] + pred[2] / 2., - ymax: pred[1] + pred[3] / 2., + xmin: x - w / 2., + ymin: y - h / 2., + xmax: x + w / 2., + ymax: y + h / 2., conf: confidence, cls: class_index, name: DETECT_CLASSES[class_index], @@ -253,16 +276,12 @@ impl YOLOv8 { pub fn predict(&self, image: &Image) -> Tensor { let img = &image.scaled_image; - println!("img={:?}", img); - let img = img .unsqueeze(0) .to_kind(tch::Kind::Float) .to_device(self.device) .g_div_scalar(255.); - println!("img_float={:?}", img); - let pred = self .model .forward_ts(&[img]) @@ -304,15 +323,9 @@ impl Image { let width = dimension.0; let height = dimension.1; let image = tch::vision::image::load(path).expect("can't load image"); - let scaled_image = - tch::vision::image::resize(&image, width, height).expect("can't resize image"); - utils::print_tensor(&scaled_image); - println!("---------------------------------"); // let scaled_image = - // utils::preprocess(path, dimension.0 as i32, true).expect("image preprocess"); - let scaled_image = utils::plain_resize(path).expect("XXXXXXXXXXXXXXXXXXXXXx"); - println!("AHOJ"); - utils::print_tensor(&scaled_image); + // tch::vision::image::resize(&image, width, height).expect("can't resize image"); + let scaled_image = utils::preprocess_torch(path, dimension.0 as i32); Self { width, height, diff --git a/src/utils.rs b/src/utils.rs index 85c3921..aa39794 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,121 +1,86 @@ -use opencv::{ - core::{copy_make_border, Vector, BORDER_CONSTANT}, - imgcodecs::imwrite, - prelude::*, - Error, -}; use tch::Tensor; -pub fn plain_resize(image: &str) -> Result { - let img = opencv::imgcodecs::imread(image, opencv::imgcodecs::IMREAD_COLOR)?; - let mut result = Mat::zeros(640, 640, opencv::core::CV_8UC3) - .unwrap() - .to_mat() - .unwrap(); +pub fn preprocess_torch(path: &str, square_size: i32) -> Tensor { + let image = tch::vision::image::load(path).expect("can't load image"); + let (_, height, width) = image.size3().unwrap(); + let (uw, uh) = square(square_size, width as i32, height as i32); + let scaled_image = + tch::vision::image::resize(&image, uw as i64, uh as i64).expect("can't resize image"); + let scaled_image = Vec::::try_from(scaled_image.reshape([-1])).expect("vec"); + let mut gray: Vec = vec![114; (square_size * square_size * 3) as usize]; + let dh = (square_size - uh) / 2; + let dw = (square_size - uw) / 2; + let mut src_y = 0; + if uw > uh { + for y in dh..dh + uh { + let line = get_hline(&scaled_image, (uw as usize, uh as usize), src_y); + // println!("line={:?}", line); + put_hline( + &mut gray, + (square_size as usize, square_size as usize), + 0, + y as usize, + line, + ); + src_y += 1; + } + } + if uh > uw { + for y in 0..square_size { + let line = get_hline(&scaled_image, (uw as usize, uh as usize), src_y); + // println!("line={:?}", line); + put_hline( + &mut gray, + (square_size as usize, square_size as usize), + dw as usize, + y as usize, + line, + ); + src_y += 1; + } + } - opencv::imgproc::resize( - &img, - &mut result, - (640, 640).into(), - 0.0, - 0.0, - opencv::imgproc::INTER_LINEAR, - )?; - imwrite("resize.jpg", &result, &Vector::new())?; - let t = unsafe { - Tensor::from_blob( - result.data(), - &[640, 640, 3], - &[], - tch::Kind::Uint8, - tch::Device::Cpu, - ) - }; - let t = t.permute([2, 0, 1]); - let t = swap_bgr_to_rgb(t); - println!("after: t={:?}", t); - tch::vision::image::save(&t, "mezi.jpg").expect("can't save image"); - Ok(t) + let border = Tensor::from_slice(&gray).reshape([3, square_size as i64, square_size as i64]); + tch::vision::image::save(&border, "border.jpg").expect("can't save image"); + border } -// Preprocess input image: resize and pad the image -pub fn preprocess(image: &str, square_size: i32, center: bool) -> Result { - let img = opencv::imgcodecs::imread(image, opencv::imgcodecs::IMREAD_COLOR)?; - let size = img.size()?; - let (width, height) = (size.width, size.height); - println!("{width}x{height} -> {square_size}x{square_size}"); - let (uw, uh) = square(square_size, width, height); - println!("{uw}x{uh}"); - let (mut dw, mut dh) = (square_size - uw, square_size - uh); - if center { - dw /= 2; - dh /= 2; +fn put_hline( + v: &mut Vec, + (w, h): (usize, usize), + x_off: usize, + y: usize, + (r, g, b): (Vec, Vec, Vec), +) { + let r_off = 0; + let g_off = w * h; + let b_off = 2 * w * h; + let mut s_off = y * w; + for i in 0..r.len() { + // println!("getline: y={y}, i={i}, s_off={s_off} b_off={b_off} idx={idx}"); + v[r_off + x_off + s_off] = r[i]; + v[g_off + x_off + s_off] = g[i]; + v[b_off + x_off + s_off] = b[i]; + s_off += 1; } - let (top, bottom) = if center { - ( - (dh as f32 - 0.1).round() as i32, - (dh as f32 - 0.1).round() as i32, - ) - } else { - (0, (dh as f32 + 0.1).round() as i32) - }; - let (left, right) = if center { - ( - (dw as f32 - 0.1).round() as i32, - (dw as f32 - 0.1).round() as i32, - ) - } else { - (0, (dw as f32 + 0.1).round() as i32) - }; - let mut result = Mat::zeros(dh, dw, opencv::core::CV_8UC3) - .unwrap() - .to_mat() - .unwrap(); - opencv::imgproc::resize( - &img, - &mut result, - (uw, uh).into(), - 0.0, - 0.0, - opencv::imgproc::INTER_LINEAR, - )?; - let mut border = Mat::zeros(square_size, square_size, opencv::core::CV_8UC3) - .unwrap() - .to_mat() - .unwrap(); - copy_make_border( - &result, - &mut border, - top, - bottom, - left, - right, - BORDER_CONSTANT, - (114, 114, 114).into(), - )?; - imwrite("resize.jpg", &border, &Vector::new())?; - println!("{top},{bottom} -> {left},{right}"); - - let t = unsafe { - Tensor::from_blob( - border.data(), - &[640, 640, 3], - &[], - tch::Kind::Uint8, - tch::Device::Cpu, - ) - }; - - // im = np.stack(self.pre_transform(im)) - // im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW, (n, 3, h, w) - // im = np.ascontiguousarray(im) # contiguous - // im = torch.from_numpy(im) +} - println!("before: t={:?}", t); - let t = t.permute([2, 0, 1]); - let t = swap_bgr_to_rgb(t); - println!("after: t={:?}", t); - Ok(t) +fn get_hline(v: &Vec, (w, h): (usize, usize), y: usize) -> (Vec, Vec, Vec) { + let r_off = 0; + let g_off = w * h; + let b_off = 2 * w * h; + let mut s_off = y * w; + let mut r = vec![0; w]; + let mut g = vec![0; w]; + let mut b = vec![0; w]; + for i in 0..w { + // println!("getline: y={y}, i={i}, s_off={s_off} b_off={b_off} idx={idx}"); + r[i] = v[r_off + s_off]; + g[i] = v[g_off + s_off]; + b[i] = v[b_off + s_off]; + s_off += 1; + } + (r, g, b) } fn square(size: i32, w: i32, h: i32) -> (i32, i32) { @@ -130,51 +95,3 @@ fn square(size: i32, w: i32, h: i32) -> (i32, i32) { (tw, th) } } - -fn swap_bgr_to_rgb(img_tensor: Tensor) -> Tensor { - // Ensure the input tensor is of the correct shape - // Swap channels using indexing - // The order [2, 1, 0] corresponds to BGR to RGB - let b = img_tensor.narrow_copy(0, 0, 1); - img_tensor - .narrow(0, 0, 1) - .copy_(&img_tensor.narrow(0, 2, 1)); - img_tensor.narrow(0, 2, 1).copy_(&b); - img_tensor -} - -pub fn print_tensor(t: &Tensor) { - println!("tensor={}", t); -} - -#[cfg(test)] -mod test { - - use tch::Tensor; - - use crate::utils::swap_bgr_to_rgb; - - use super::square; - - #[test] - fn test_square() { - assert_eq!((640, 320), square(640, 1280, 640)); - assert_eq!((320, 640), square(640, 640, 1280)); - } - - #[test] - fn bgr2rgb() { - let t = - Tensor::from_slice(&[11, 11, 11, 11, 22, 22, 22, 22, 33, 33, 33, 33]).reshape([3, 4]); - println!("t={}", t); - let t = swap_bgr_to_rgb(t); - // let b = t.narrow(0, 0, 1); - // let g = t.narrow(0, 1, 1); - // let r = t.narrow(0, 2, 1); - // println!("r={}", r); - // println!("g={}", g); - // println!("b={}", b); - // t.narrow_tensor(0, &r, 1); - println!("t={}", t); - } -}