Add query method for dinov2 and adjust DataLoader

jamjamjon · Mar 30, 2024 · a5cee66 · a5cee66
1 parent af93408
commit a5cee66
Show file tree

Hide file tree

Showing 12 changed files with 249 additions and 147 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,12 +1,13 @@
 [package]
 name = "usls"
-version = "0.0.1"
+version = "0.0.2"
 edition = "2021"
 description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
 repository = "https://github.com/jamjamjon/usls"
 authors = ["Jamjamjon <[email protected]>"]
 license = "MIT"
 readme = "README.md"
+exclude = ["assets/*", "examples/*"]
 
 [dependencies]
 clap = { version = "4.2.4", features = ["derive"] }

diff --git a/README.md b/README.md
@@ -2,34 +2,34 @@
 
 A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv8](https://github.com/ultralytics/ultralytics) `(Classification, Segmentation, Detection and Pose Detection)`, [YOLOv9](https://github.com/WongKinYiu/yolov9), [RTDETR](https://arxiv.org/abs/2304.08069), [CLIP](https://github.com/openai/CLIP), [DINOv2](https://github.com/facebookresearch/dinov2), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [BLIP](https://arxiv.org/abs/2201.12086), and others. Many execution providers are supported, sunch as `CUDA`, `TensorRT` and `CoreML`.
 
-
 ## Supported Models
 
-|         Model         |         Example         |     CUDA(f32)     |     CUDA(f16)     |       TensorRT(f32)       |       TensorRT(f16)       | 
-| :-------------------: | :----------------------: | :----------------: | :----------------: | :------------------------: | :-----------------------: | 
-|   YOLOv8-detection   |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |                    
-|      YOLOv8-pose      |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |   
-| YOLOv8-classification |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |               
-|  YOLOv8-segmentation  |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |               
-|      YOLOv8-OBB      |    ***TODO***    | ***TODO*** | ***TODO*** |     ***TODO***     |    ***TODO***    |                                   |         
-|        YOLOv9        |   [demo](examples/yolov9)   |         ✅         |         ✅         |             ✅             |            ✅            |                           
-|        RT-DETR        |   [demo](examples/rtdetr)   |         ✅         |         ✅         |             ✅             |            ✅            |          
-|        FastSAM        |  [demo](examples/fastsam)  |         ✅         |         ✅         |             ✅             |            ✅            |     
-|      YOLO-World      | [demo](examples/yolo-world) |         ✅         |         ✅         |             ✅             |            ✅            |      
-|        DINOv2        |   [demo](examples/dinov2)   |         ✅         |         ✅         |             ✅             |            ✅            |      
-|         CLIP         |    [demo](examples/clip)    |         ✅         |         ✅         | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |                   
-|         BLIP         |    [demo](examples/blip)    |         ✅         |         ✅         | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |     
-|     OCR(DB, SVTR)     |    ***TODO***    | ***TODO*** | ***TODO*** |     ***TODO***     |    ***TODO***    |                                   |    
+|         Model         |         Example         |     CUDA(f32)     |     CUDA(f16)     |       TensorRT(f32)       |       TensorRT(f16)       |
+| :-------------------: | :----------------------: | :----------------: | :----------------: | :------------------------: | :-----------------------: |
+|   YOLOv8-detection   |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |
+|      YOLOv8-pose      |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |
+| YOLOv8-classification |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |
+|  YOLOv8-segmentation  |   [demo](examples/yolov8)   |         ✅         |         ✅         |             ✅             |            ✅            |
+|      YOLOv8-OBB      |    ***TODO***    | ***TODO*** | ***TODO*** |     ***TODO***     |    ***TODO***    |
+|        YOLOv9        |   [demo](examples/yolov9)   |         ✅         |         ✅         |             ✅             |            ✅            |
+|        RT-DETR        |   [demo](examples/rtdetr)   |         ✅         |         ✅         |             ✅             |            ✅            |
+|        FastSAM        |  [demo](examples/fastsam)  |         ✅         |         ✅         |             ✅             |            ✅            |
+|      YOLO-World      | [demo](examples/yolo-world) |         ✅         |         ✅         |             ✅             |            ✅            |
+|        DINOv2        |   [demo](examples/dinov2)   |         ✅         |         ✅         |             ✅             |            ✅            |
+|         CLIP         |    [demo](examples/clip)    |         ✅         |         ✅         | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
+|         BLIP         |    [demo](examples/blip)    |         ✅         |         ✅         | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
+|     OCR(DB, SVTR)     |    ***TODO***    | ***TODO*** | ***TODO*** |     ***TODO***     |    ***TODO***    |
 
 ## Solution Models
+
 Additionally, this repo also provides some solution models such as pedestrian `fall detection`, `head detection`, `trash detection`, and more.
 
-|             Model             |             Example             |                                    Result                                    |
-| :---------------------------: | :------------------------------: | :--------------------------------------------------------------------------: |
-|    face-landmark detection    |    [demo](examples/yolov8-face)    |   <img src="./examples/yolov8-face/demo.jpg" width="400" height="300">  |
-|        head detection        |    [demo](examples/yolov8-head)    |   <img src="./examples/yolov8-head/demo.jpg" width="400" height="300">   |
-|      fall detection      |  [demo](examples/yolov8-falldown)  | <img src="./examples/yolov8-falldown/demo.jpg" width="400" height="300"> |
-| trash detection | [demo](examples/yolov8-plastic-bag) |  <img src="./examples/yolov8-trash/demo.jpg" width="400" height="260">  |
+|          Model          |             Example             |  
+| :---------------------: | :------------------------------: |
+| face-landmark detection |    [demo](examples/yolov8-face)    |
+|     head detection     |    [demo](examples/yolov8-head)    |
+|     fall detection     |  [demo](examples/yolov8-falldown)  |
+|    trash detection    | [demo](examples/yolov8-plastic-bag) | 
 
 ## Demo
 
@@ -44,53 +44,63 @@ cargo run -r --example yolov8   # fastsam, yolov9, blip, clip, dinov2, yolo-worl
 check **[ort guide](https://ort.pyke.io/setup/linking)**
 
 <details close>
-<summary>For Linux or MacOS users</summary>	
+<summary>For Linux or MacOS users</summary>
 
 - Firstly, download from latest release from [ONNXRuntime Releases](https://github.com/microsoft/onnxruntime/releases)
 - Then linking
-   ```shell
-   export ORT_DYLIB_PATH=/Users/qweasd/Desktop/onnxruntime-osx-arm64-1.17.1/lib/libonnxruntime.1.17.1.dylib
-   ```
+  ```shell
+  export ORT_DYLIB_PATH=/Users/qweasd/Desktop/onnxruntime-osx-arm64-1.17.1/lib/libonnxruntime.1.17.1.dylib
+  ```
+
 </details>
 
-#### 2. Add `usls` as a dependency to your project's `Cargo.toml:`
+#### 2. Add `usls` as a dependency to your project's `Cargo.toml`
+
+```shell
+cargo add --git https://github.com/jamjamjon/usls
 
+# or
+cargo add usls 
 ```
-[dependencies]
-usls = "0.0.1"
+
+
+#### 3. Set `Options` and build model
+```Rust
+let options = Options::default()
+    .with_model("../models/yolov8m-seg-dyn-f16.onnx")
+    .with_trt(0) // using cuda(0) by default
+// when model with dynamic shapes
+    .with_i00((1, 2, 4).into()) // dynamic batch
+    .with_i02((416, 640, 800).into())   // dynamic height
+    .with_i03((416, 640, 800).into())   // dynamic width
+    .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
+    .with_dry_run(3)
+    .with_saveout("YOLOv8");    // save results
+let mut model = YOLO::new(&options)?;
 ```
 
-#### 3. Set model `Options` and build `model`, then you're ready to go.
+#### 4. Prepare inputs, and then you're ready to go
+
+- Build `DataLoader` to load images
 
 ```Rust
-2use usls::{models::YOLO, Options};
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // 1.build model
-    let options = Options::default()
-        .with_model("../models/yolov8m-seg-dyn-f16.onnx")
-        .with_trt(0) // using cuda(0) by default
-	// when model with dynamic shapes
-        .with_i00((1, 2, 4).into()) // dynamic batch
-        .with_i02((416, 640, 800).into())   // dynamic height
-        .with_i03((416, 640, 800).into())   // dynamic width
-        .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
-        .with_saveout("YOLOv8");    // save results
-    let mut model = YOLO::new(&options)?;
-
-    // 2.build dataloader
-    let dl = DataLoader::default()
-        .with_batch(model.batch.opt as usize)
-        .load("./assets/")?;
-
-    // 3.run
-    for (xs, _paths) in dl {
-        let _y = model.run(&xs)?;
-    }
-    Ok(())
+let dl = DataLoader::default()
+    .with_batch(model.batch.opt as usize)
+    .load("./assets/")?;
+
+for (xs, _paths) in dl {
+    let _y = model.run(&xs)?;
 }
 ```
 
+- Or simply read one image
+
+```Rust
+let x = DataLoader::try_read("./assets/bus.jpg")?;
+let _y = model.run(&[x])?;
+```
+
+
 ## Script: converte ONNX model from `float32` to `float16`
 
 ```python

diff --git a/examples/dinov2/README.md b/examples/dinov2/README.md
@@ -16,22 +16,14 @@ cargo run -r --example dinov2
 
 [dinov2-b14](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14.onnx)
 [dinov2-b14-dyn](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn.onnx)
-[dinov2-b14-dyn-f16](https://github.com/jamjamjon/assets/releases/download/v0.0.1/dinov2-b14-dyn-f16.onnx)
+
 
 ### 2. Specify the ONNX model path in `main.rs`
 
 ```Rust
 let options = Options::default()
     .with_model("ONNX_PATH")    // <= modify this
     .with_profile(false);
-
-// build index
-let options = IndexOptions {
-    dimensions: 384, // 768 for vitb; 384 for vits
-    metric: MetricKind::L2sq,
-    quantization: ScalarKind::F16,
-    ..Default::default()
-};
 ```
 
 ### 3. Then, run
@@ -43,8 +35,7 @@ cargo run -r --example dinov2
 ## Results
 
 ```shell
-Top-1 distance: 0.0 => "./examples/dinov2/images/bus.jpg"
-Top-2 distance: 1.8332717 => "./examples/dinov2/images/dog.png"
-Top-3 distance: 1.9672602 => "./examples/dinov2/images/cat.png"
-Top-4 distance: 1.978817 => "./examples/dinov2/images/carrot.jpg"
+Top-1  0.0000000 /home/qweasd/Desktop/usls/examples/dinov2/images/bus.jpg
+Top-2  1.9059424 /home/qweasd/Desktop/usls/examples/dinov2/images/1.jpg
+Top-3  1.9736203 /home/qweasd/Desktop/usls/examples/dinov2/images/2.jpg
 ```
diff --git a/examples/dinov2/images/GlqO.jpg → examples/dinov2/images/1.jpg b/examples/dinov2/images/GlqO.jpg → examples/dinov2/images/1.jpg
diff --git a/examples/dinov2/images/JasD.jpg → examples/dinov2/images/2.jpg b/examples/dinov2/images/JasD.jpg → examples/dinov2/images/2.jpg
diff --git a/examples/dinov2/main.rs b/examples/dinov2/main.rs
@@ -1,55 +1,36 @@
-use usearch::ffi::{IndexOptions, MetricKind, ScalarKind};
-use usls::{models::Dinov2, DataLoader, Options};
+use usls::{models::Dinov2, Metric, Options};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
     let options = Options::default()
         .with_model("../models/dinov2-s14-dyn-f16.onnx")
+        // .with_model("../models/dinov2-b14-dyn.onnx")
         .with_i00((1, 1, 1).into())
         .with_i02((224, 224, 224).into())
         .with_i03((224, 224, 224).into());
     let mut model = Dinov2::new(&options)?;
 
-    // build dataloader
-    let dl = DataLoader::default()
-        .with_batch(model.batch.opt as usize)
-        .load("./examples/dinov2/images")?;
+    // query from vector
+    let ys = model.query_from_vec(
+        "./assets/bus.jpg",
+        &[
+            "./examples/dinov2/images/bus.jpg",
+            "./examples/dinov2/images/1.jpg",
+            "./examples/dinov2/images/2.jpg",
+        ],
+        Metric::L2,
+    )?;
 
-    // load query
-    let query = image::io::Reader::open("./assets/bus.jpg")?.decode()?;
-    let query = model.run(&[query])?;
+    // or query from folder
+    // let ys = model.query_from_folder("./assets/bus.jpg", "./examples/dinov2/images", Metric::IP)?;
 
-    // build index
-    let options = IndexOptions {
-        dimensions: 384, // 768 for vitb; 384 for vits
-        metric: MetricKind::L2sq,
-        quantization: ScalarKind::F16,
-        ..Default::default()
-    };
-    let index = usearch::new_index(&options)?;
-    index.reserve(dl.clone().count())?;
-
-    // load feats
-    for (idx, (image, _path)) in dl.clone().enumerate() {
-        let y = model.run(&image)?;
-        index.add(idx as u64, &y.into_raw_vec())?;
-    }
-
-    // output
-    let topk = 10;
-    let matches = index.search(&query.into_raw_vec(), topk)?;
-    let paths = dl.paths;
-    for (idx, (k, score)) in matches
-        .keys
-        .into_iter()
-        .zip(matches.distances.into_iter())
-        .enumerate()
-    {
+    // results
+    for (i, y) in ys.iter().enumerate() {
         println!(
-            "Top-{} distance: {:?} => {:?}",
-            idx + 1,
-            score,
-            paths[k as usize]
+            "Top-{:<3}{:.7} {}",
+            i + 1,
+            y.1,
+            y.2.canonicalize()?.display()
         );
     }
 

diff --git a/examples/yolov8-face/main.rs b/examples/yolov8-face/main.rs
@@ -12,11 +12,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .with_profile(false);
     let mut model = YOLO::new(&options)?;
 
-    // build dataloader
-    let mut dl = DataLoader::default().load("./assets/kids.jpg")?;
+    // load image
+    let x = DataLoader::try_read("./assets/kids.jpg")?;
 
     // run
-    model.run(&dl.next().unwrap().0)?;
+    let _y = model.run(&[x])?;
 
     Ok(())
 }
diff --git a/examples/yolov8/main.rs b/examples/yolov8/main.rs
@@ -4,22 +4,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     // 1.build model
     let options = Options::default()
         .with_model("../models/yolov8m-dyn-f16.onnx")
-        .with_trt(0) // cuda by default
-        .with_fp16(true)
+        // .with_trt(0) // cuda by default
+        // .with_fp16(true)
         .with_i00((1, 1, 4).into())
         .with_i02((416, 640, 800).into())
         .with_i03((416, 640, 800).into())
         .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
-        .with_profile(true)
-        .with_dry_run(5)
+        .with_profile(false)
+        .with_dry_run(3)
         .with_skeletons(&COCO_SKELETON_17)
         .with_saveout("YOLOv8");
     let mut model = YOLO::new(&options)?;
 
     // 2.build dataloader
-    let dl = DataLoader::default()
-        .with_batch(1)
-        .load("./assets/bus.jpg")?;
+    let dl = DataLoader::default().with_batch(1).load("./assets")?;
 
     // 3.run
     for (xs, _paths) in dl {