diff --git a/Cargo.toml b/Cargo.toml
index 99ee863..9598b3d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,7 +6,6 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-opencv = "0.92.2"
 tch = "0.17.0"
 
 [[example]]
diff --git a/code b/code
index ccd9c57..036ff09 100755
--- a/code
+++ b/code
@@ -4,4 +4,4 @@ export LIBTORCH_LIB=$(pwd)/libtorch/
 
 export LD_LIBRARY_PATH="$LIBTORCH/lib/:$LD_LIBRARY_PATH"
 
-code
+code .
diff --git a/examples/predict/main.rs b/examples/predict/main.rs
index d4b4ede..c033a2f 100644
--- a/examples/predict/main.rs
+++ b/examples/predict/main.rs
@@ -1,16 +1,16 @@
 use tch::TchError;
 use yolo_v8::{Image, YoloV8Classifier, YoloV8ObjectDetection, YoloV8Segmentation};
 
-fn object_detection() {
+fn object_detection(path: &str) {
     // Load image to perform object detection, note that YOLOv8 resolution must match
     // scaling width and height here
-    let mut image = Image::new("images/bus.jpg", YoloV8ObjectDetection::input_dimension());
+    let mut image = Image::new(path, YoloV8ObjectDetection::input_dimension());
 
     // Load exported torchscript for object detection
     let yolo = YoloV8ObjectDetection::new();
 
     // Predict with non-max-suppression in the end
-    let bboxes = yolo.predict(&image, 0.15, 0.35);
+    let bboxes = yolo.predict(&image, 0.25, 0.7);
     println!("bboxes={:?}", bboxes);
 
     // Draw rectangles around detected objects
@@ -19,9 +19,9 @@ fn object_detection() {
     image.save("images/result2.jpg");
 }
 
-fn image_classification() {
+fn image_classification(path: &str) {
     // Load image to perform image classification
-    let image = Image::new("images/test.jpg", YoloV8Classifier::input_dimension());
+    let image = Image::new(path, YoloV8Classifier::input_dimension());
 
     // Load exported torchscript for object detection
     let yolo = YoloV8Classifier::new();
@@ -39,10 +39,10 @@ fn image_segmentation() {
     let classes = yolo.predict(&image);
 }
 
-// YOLOv8n (nano model) for object detection in image
+// YOLOv8n for object detection in image
 fn main() -> Result<(), TchError> {
-    object_detection();
-    // image_classification();
+    object_detection("images/katri.jpg");
+    // image_classification("images/katri.jpg");
     // image_segmentation();
     Ok(())
 }
diff --git a/images/katri.jpg b/images/katri.jpg
new file mode 100644
index 0000000..9f8f50a
Binary files /dev/null and b/images/katri.jpg differ
diff --git a/src/lib.rs b/src/lib.rs
index fd6d6a6..116bd57 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -98,10 +98,10 @@ impl YoloV8ObjectDetection {
     }
 
     pub fn predict(&self, image: &Image, conf_thresh: f64, iou_thresh: f64) -> Vec<BBox> {
-        println!("predict(): image={:?}", image.scaled_image);
+        // println!("predict(): image={:?}", image.scaled_image);
         let pred = self.yolo.predict(image);
-        println!("pred={:?}", pred);
-        self.non_max_suppression(&pred.get(0), conf_thresh, iou_thresh)
+        // println!("pred={:?}", pred);
+        self.non_max_suppression(image, &pred.get(0), conf_thresh, iou_thresh)
     }
 
     fn iou(&self, b1: &BBox, b2: &BBox) -> f64 {
@@ -117,6 +117,7 @@ impl YoloV8ObjectDetection {
 
     fn non_max_suppression(
         &self,
+        image: &Image,
         prediction: &tch::Tensor,
         conf_thresh: f64,
         iou_thresh: f64,
@@ -143,11 +144,33 @@ impl YoloV8ObjectDetection {
                     //     CLASSES[class_index]
                     // );
 
+                    let (_, orig_h, orig_w) = image.image.size3().unwrap();
+                    let (_, sh, sw) = image.scaled_image.size3().unwrap();
+                    let cx = sw as f64 / 2.0;
+                    let cy = sh as f64 / 2.0;
+                    let mut dx = pred[0] - cx;
+                    let mut dy = pred[1] - cy;
+                    let mut w = pred[2];
+                    let mut h = pred[3];
+
+                    let aspect = orig_w as f64 / orig_h as f64;
+
+                    if orig_w > orig_h {
+                        dy *= aspect;
+                        h *= aspect;
+                    } else {
+                        dx /= aspect;
+                        w /= aspect;
+                    }
+
+                    let x = cx + dx;
+                    let y = cy + dy;
+
                     let bbox = BBox {
-                        xmin: pred[0] - pred[2] / 2.,
-                        ymin: pred[1] - pred[3] / 2.,
-                        xmax: pred[0] + pred[2] / 2.,
-                        ymax: pred[1] + pred[3] / 2.,
+                        xmin: x - w / 2.,
+                        ymin: y - h / 2.,
+                        xmax: x + w / 2.,
+                        ymax: y + h / 2.,
                         conf: confidence,
                         cls: class_index,
                         name: DETECT_CLASSES[class_index],
@@ -253,16 +276,12 @@ impl YOLOv8 {
     pub fn predict(&self, image: &Image) -> Tensor {
         let img = &image.scaled_image;
 
-        println!("img={:?}", img);
-
         let img = img
             .unsqueeze(0)
             .to_kind(tch::Kind::Float)
             .to_device(self.device)
             .g_div_scalar(255.);
 
-        println!("img_float={:?}", img);
-
         let pred = self
             .model
             .forward_ts(&[img])
@@ -304,15 +323,9 @@ impl Image {
         let width = dimension.0;
         let height = dimension.1;
         let image = tch::vision::image::load(path).expect("can't load image");
-        let scaled_image =
-            tch::vision::image::resize(&image, width, height).expect("can't resize image");
-        utils::print_tensor(&scaled_image);
-        println!("---------------------------------");
         // let scaled_image =
-        //     utils::preprocess(path, dimension.0 as i32, true).expect("image preprocess");
-        let scaled_image = utils::plain_resize(path).expect("XXXXXXXXXXXXXXXXXXXXXx");
-        println!("AHOJ");
-        utils::print_tensor(&scaled_image);
+        //     tch::vision::image::resize(&image, width, height).expect("can't resize image");
+        let scaled_image = utils::preprocess_torch(path, dimension.0 as i32);
         Self {
             width,
             height,
diff --git a/src/utils.rs b/src/utils.rs
index 85c3921..aa39794 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,121 +1,86 @@
-use opencv::{
-    core::{copy_make_border, Vector, BORDER_CONSTANT},
-    imgcodecs::imwrite,
-    prelude::*,
-    Error,
-};
 use tch::Tensor;
 
-pub fn plain_resize(image: &str) -> Result<Tensor, Error> {
-    let img = opencv::imgcodecs::imread(image, opencv::imgcodecs::IMREAD_COLOR)?;
-    let mut result = Mat::zeros(640, 640, opencv::core::CV_8UC3)
-        .unwrap()
-        .to_mat()
-        .unwrap();
+pub fn preprocess_torch(path: &str, square_size: i32) -> Tensor {
+    let image = tch::vision::image::load(path).expect("can't load image");
+    let (_, height, width) = image.size3().unwrap();
+    let (uw, uh) = square(square_size, width as i32, height as i32);
+    let scaled_image =
+        tch::vision::image::resize(&image, uw as i64, uh as i64).expect("can't resize image");
+    let scaled_image = Vec::<u8>::try_from(scaled_image.reshape([-1])).expect("vec");
+    let mut gray: Vec<u8> = vec![114; (square_size * square_size * 3) as usize];
+    let dh = (square_size - uh) / 2;
+    let dw = (square_size - uw) / 2;
+    let mut src_y = 0;
+    if uw > uh {
+        for y in dh..dh + uh {
+            let line = get_hline(&scaled_image, (uw as usize, uh as usize), src_y);
+            // println!("line={:?}", line);
+            put_hline(
+                &mut gray,
+                (square_size as usize, square_size as usize),
+                0,
+                y as usize,
+                line,
+            );
+            src_y += 1;
+        }
+    }
+    if uh > uw {
+        for y in 0..square_size {
+            let line = get_hline(&scaled_image, (uw as usize, uh as usize), src_y);
+            // println!("line={:?}", line);
+            put_hline(
+                &mut gray,
+                (square_size as usize, square_size as usize),
+                dw as usize,
+                y as usize,
+                line,
+            );
+            src_y += 1;
+        }
+    }
 
-    opencv::imgproc::resize(
-        &img,
-        &mut result,
-        (640, 640).into(),
-        0.0,
-        0.0,
-        opencv::imgproc::INTER_LINEAR,
-    )?;
-    imwrite("resize.jpg", &result, &Vector::new())?;
-    let t = unsafe {
-        Tensor::from_blob(
-            result.data(),
-            &[640, 640, 3],
-            &[],
-            tch::Kind::Uint8,
-            tch::Device::Cpu,
-        )
-    };
-    let t = t.permute([2, 0, 1]);
-    let t = swap_bgr_to_rgb(t);
-    println!("after: t={:?}", t);
-    tch::vision::image::save(&t, "mezi.jpg").expect("can't save image");
-    Ok(t)
+    let border = Tensor::from_slice(&gray).reshape([3, square_size as i64, square_size as i64]);
+    tch::vision::image::save(&border, "border.jpg").expect("can't save image");
+    border
 }
 
-// Preprocess input image: resize and pad the image
-pub fn preprocess(image: &str, square_size: i32, center: bool) -> Result<Tensor, Error> {
-    let img = opencv::imgcodecs::imread(image, opencv::imgcodecs::IMREAD_COLOR)?;
-    let size = img.size()?;
-    let (width, height) = (size.width, size.height);
-    println!("{width}x{height} -> {square_size}x{square_size}");
-    let (uw, uh) = square(square_size, width, height);
-    println!("{uw}x{uh}");
-    let (mut dw, mut dh) = (square_size - uw, square_size - uh);
-    if center {
-        dw /= 2;
-        dh /= 2;
+fn put_hline(
+    v: &mut Vec<u8>,
+    (w, h): (usize, usize),
+    x_off: usize,
+    y: usize,
+    (r, g, b): (Vec<u8>, Vec<u8>, Vec<u8>),
+) {
+    let r_off = 0;
+    let g_off = w * h;
+    let b_off = 2 * w * h;
+    let mut s_off = y * w;
+    for i in 0..r.len() {
+        // println!("getline: y={y}, i={i}, s_off={s_off} b_off={b_off} idx={idx}");
+        v[r_off + x_off + s_off] = r[i];
+        v[g_off + x_off + s_off] = g[i];
+        v[b_off + x_off + s_off] = b[i];
+        s_off += 1;
     }
-    let (top, bottom) = if center {
-        (
-            (dh as f32 - 0.1).round() as i32,
-            (dh as f32 - 0.1).round() as i32,
-        )
-    } else {
-        (0, (dh as f32 + 0.1).round() as i32)
-    };
-    let (left, right) = if center {
-        (
-            (dw as f32 - 0.1).round() as i32,
-            (dw as f32 - 0.1).round() as i32,
-        )
-    } else {
-        (0, (dw as f32 + 0.1).round() as i32)
-    };
-    let mut result = Mat::zeros(dh, dw, opencv::core::CV_8UC3)
-        .unwrap()
-        .to_mat()
-        .unwrap();
-    opencv::imgproc::resize(
-        &img,
-        &mut result,
-        (uw, uh).into(),
-        0.0,
-        0.0,
-        opencv::imgproc::INTER_LINEAR,
-    )?;
-    let mut border = Mat::zeros(square_size, square_size, opencv::core::CV_8UC3)
-        .unwrap()
-        .to_mat()
-        .unwrap();
-    copy_make_border(
-        &result,
-        &mut border,
-        top,
-        bottom,
-        left,
-        right,
-        BORDER_CONSTANT,
-        (114, 114, 114).into(),
-    )?;
-    imwrite("resize.jpg", &border, &Vector::new())?;
-    println!("{top},{bottom} -> {left},{right}");
-
-    let t = unsafe {
-        Tensor::from_blob(
-            border.data(),
-            &[640, 640, 3],
-            &[],
-            tch::Kind::Uint8,
-            tch::Device::Cpu,
-        )
-    };
-
-    //       im = np.stack(self.pre_transform(im))
-    // im = im[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
-    // im = np.ascontiguousarray(im)  # contiguous
-    // im = torch.from_numpy(im)
+}
 
-    println!("before: t={:?}", t);
-    let t = t.permute([2, 0, 1]);
-    let t = swap_bgr_to_rgb(t);
-    println!("after: t={:?}", t);
-    Ok(t)
+fn get_hline(v: &Vec<u8>, (w, h): (usize, usize), y: usize) -> (Vec<u8>, Vec<u8>, Vec<u8>) {
+    let r_off = 0;
+    let g_off = w * h;
+    let b_off = 2 * w * h;
+    let mut s_off = y * w;
+    let mut r = vec![0; w];
+    let mut g = vec![0; w];
+    let mut b = vec![0; w];
+    for i in 0..w {
+        // println!("getline: y={y}, i={i}, s_off={s_off} b_off={b_off} idx={idx}");
+        r[i] = v[r_off + s_off];
+        g[i] = v[g_off + s_off];
+        b[i] = v[b_off + s_off];
+        s_off += 1;
+    }
+    (r, g, b)
 }
 
 fn square(size: i32, w: i32, h: i32) -> (i32, i32) {
@@ -130,51 +95,3 @@ fn square(size: i32, w: i32, h: i32) -> (i32, i32) {
         (tw, th)
     }
 }
-
-fn swap_bgr_to_rgb(img_tensor: Tensor) -> Tensor {
-    // Ensure the input tensor is of the correct shape
-    // Swap channels using indexing
-    // The order [2, 1, 0] corresponds to BGR to RGB
-    let b = img_tensor.narrow_copy(0, 0, 1);
-    img_tensor
-        .narrow(0, 0, 1)
-        .copy_(&img_tensor.narrow(0, 2, 1));
-    img_tensor.narrow(0, 2, 1).copy_(&b);
-    img_tensor
-}
-
-pub fn print_tensor(t: &Tensor) {
-    println!("tensor={}", t);
-}
-
-#[cfg(test)]
-mod test {
-
-    use tch::Tensor;
-
-    use crate::utils::swap_bgr_to_rgb;
-
-    use super::square;
-
-    #[test]
-    fn test_square() {
-        assert_eq!((640, 320), square(640, 1280, 640));
-        assert_eq!((320, 640), square(640, 640, 1280));
-    }
-
-    #[test]
-    fn bgr2rgb() {
-        let t =
-            Tensor::from_slice(&[11, 11, 11, 11, 22, 22, 22, 22, 33, 33, 33, 33]).reshape([3, 4]);
-        println!("t={}", t);
-        let t = swap_bgr_to_rgb(t);
-        // let b = t.narrow(0, 0, 1);
-        // let g = t.narrow(0, 1, 1);
-        // let r = t.narrow(0, 2, 1);
-        // println!("r={}", r);
-        // println!("g={}", g);
-        // println!("b={}", b);
-        // t.narrow_tensor(0, &r, 1);
-        println!("t={}", t);
-    }
-}