Dev (#1)

* Update imageproc crates * Add top-p method for sampling * Add SVTR for text recognition & bug fix
jamjamjon · Apr 6, 2024 · a0d410b · a0d410b
1 parent ce9a416
commit a0d410b
Show file tree

Hide file tree

Showing 48 changed files with 1,592 additions and 961 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,30 +11,33 @@ exclude = ["assets/*", "examples/*"]
 
 [dependencies]
 clap = { version = "4.2.4", features = ["derive"] }
-image = { version = "0.24.7", default-features = false, features = [
-    "jpeg", 
-    "png", 
-    "tiff", 
-    "webp", 
-    "webp-encoder",
-    "bmp"
-]}
-imageproc = { version = "0.23.0", default-features = false }
 ndarray = { version = "0.15.6" }
-# ort-sys = { version = "2.0.0-alpha.4" }
-# ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "half", "ndarray", "cuda", "tensorrt", "coreml", "openvino"] }
-ort = { version = "2.0.0-alpha.4", default-features = false, features = ["load-dynamic", "copy-dylibs", "profiling", "half", "ndarray", "cuda", "tensorrt", "coreml", "ureq", "openvino"] }
-rusttype = { version = "0.9", default-features = false }
+ort = { version = "2.0.0-alpha.4", default-features = false, features = [
+    "load-dynamic",
+    "copy-dylibs",
+    "profiling",
+    "half",
+    "ndarray",
+    "cuda",
+    "tensorrt",
+    "coreml",
+    "ureq",
+    "openvino",
+] }
 anyhow = { version = "1.0.75" }
 regex = { version = "1.5.4" }
 rand = { version = "0.8.5" }
 chrono = { version = "0.4.30" }
 half = { version = "2.3.1" }
 dirs = { version = "5.0.1" }
-ureq = { version = "2.9.1", default-features = true, features = [ "socks-proxy" ] }
+ureq = { version = "2.9.1", default-features = true, features = [
+    "socks-proxy",
+] }
 walkdir = { version = "2.5.0" }
 tokenizers = { version = "0.15.2" }
-itertools = { version = "0.12.1" }
-usearch = { version = "2.9.1" }
+usearch = { version = "2.10.4" }
 rayon = "1.10.0"
 indicatif = "0.17.8"
+image = "0.25.1"
+imageproc = { version = "0.24" }
+ab_glyph = "0.2.23"
diff --git a/README.md b/README.md
@@ -4,34 +4,35 @@ A Rust library integrated with **ONNXRuntime**, providing a collection of **Comp
 
 ## Supported Models
 
-|              Model              |         Example         | CUDA<br />f32 | CUDA<br />f16 |     TensorRT<br />f32     |     TensorRT<br />f16     |
-| :-----------------------------: | :----------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
-|   **YOLOv8-detection**   |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
-|      **YOLOv8-pose**      |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
-| **YOLOv8-classification** |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
-|  **YOLOv8-segmentation**  |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
-|      **YOLOv8-OBB**      |           TODO           |     TODO     |     TODO     |            TODO            |           TODO           |
-|        **YOLOv9**        |   [demo](examples/yolov9)   |      ✅      |      ✅      |             ✅             |            ✅            |
-|        **RT-DETR**        |   [demo](examples/rtdetr)   |      ✅      |      ✅      |             ✅             |            ✅            |
-|        **FastSAM**        |  [demo](examples/fastsam)  |      ✅      |      ✅      |             ✅             |            ✅            |
-|      **YOLO-World**      | [demo](examples/yolo-world) |      ✅      |      ✅      |             ✅             |            ✅            |
-|        **DINOv2**        |   [demo](examples/dinov2)   |      ✅      |      ✅      |             ✅             |            ✅            |
-|         **CLIP**         |    [demo](examples/clip)    |      ✅      |      ✅      | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
-|         **BLIP**         |    [demo](examples/blip)    |      ✅      |      ✅      | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
-|          [**DB(Text Detection)**](https://arxiv.org/abs/1911.08947)          |     [demo](examples/db)     |      ✅      |      ❌      |             ✅             |            ✅            |
-|        **SVTR, TROCR**        |           TODO           |     TODO     |     TODO     |            TODO            |           TODO           |
+|                               Model                               |         Example         | CUDA<br />f32 | CUDA<br />f16 |     TensorRT<br />f32     |     TensorRT<br />f16     |
+| :---------------------------------------------------------------: | :----------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
+|                    **YOLOv8-detection**                    |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                       **YOLOv8-pose**                       |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                  **YOLOv8-classification**                  |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                   **YOLOv8-segmentation**                   |   [demo](examples/yolov8)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                       **YOLOv8-OBB**                       |           TODO           |     TODO     |     TODO     |            TODO            |           TODO           |
+|                         **YOLOv9**                         |   [demo](examples/yolov9)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                         **RT-DETR**                         |   [demo](examples/rtdetr)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                         **FastSAM**                         |  [demo](examples/fastsam)  |      ✅      |      ✅      |             ✅             |            ✅            |
+|                       **YOLO-World**                       | [demo](examples/yolo-world) |      ✅      |      ✅      |             ✅             |            ✅            |
+|                         **DINOv2**                         |   [demo](examples/dinov2)   |      ✅      |      ✅      |             ✅             |            ✅            |
+|                          **CLIP**                          |    [demo](examples/clip)    |      ✅      |      ✅      | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
+|                          **BLIP**                          |    [demo](examples/blip)    |      ✅      |      ✅      | ✅ visual<br />❌ textual | ✅ visual<br />❌ textual |
+|   [**DB(Text Detection)**](https://arxiv.org/abs/1911.08947)   |     [demo](examples/db)     |      ✅      |      ❌      |             ✅             |            ✅            |
+| [**SVTR(Text Recognition)**](https://arxiv.org/abs/2205.00159) |    [demo](examples/svtr)    |      ✅      |      ❌      |             ✅             |            ✅            |
 
 ## Solution Models
 
 Additionally, this repo also provides some solution models such as pedestrian `fall detection`, `head detection`, `trash detection`, and more.
 
-|                           Model                           |             Example             |
-| :-------------------------------------------------------: | :------------------------------: |
-| **face-landmark detection**<br />**人脸 & 关键点检测** |    [demo](examples/yolov8-face)    |
-|         **head detection**<br />  **人头检测**         |    [demo](examples/yolov8-head)    |
-|         **fall detection**<br />  **摔倒检测**         |  [demo](examples/yolov8-falldown)  |
-|         **trash detection**<br />  **垃圾检测**         | [demo](examples/yolov8-plastic-bag) |
-| **text detection(PPOCR-det v3, v4)**<br />**PPOCR文本检测** |         [demo](examples/db)         |
+|                                       Model                                       |             Example             |
+| :--------------------------------------------------------------------------------: | :------------------------------: |
+|    **text detection<br />(PPOCR-det v3, v4)**<br />**通用文本检测**    |         [demo](examples/db)         |
+| **text recognition<br />(PPOCR-rec v3, v4)**<br />**中英文-文本识别** |        [demo](examples/svtr)        |
+|         **face-landmark detection**<br />**人脸 & 关键点检测**         |    [demo](examples/yolov8-face)    |
+|                 **head detection**<br />  **人头检测**                 |    [demo](examples/yolov8-head)    |
+|                 **fall detection**<br />  **摔倒检测**                 |  [demo](examples/yolov8-falldown)  |
+|                **trash detection**<br />  **垃圾检测**                | [demo](examples/yolov8-plastic-bag) |
 
 ## Demo
 
@@ -60,27 +61,42 @@ check **[ort guide](https://ort.pyke.io/setup/linking)**
 
 ```shell
 cargo add --git https://github.com/jamjamjon/usls
-
-# or
-cargo add usls 
 ```
 
 #### 3. Set `Options` and build model
 
 ```Rust
 let options = Options::default()
-    .with_model("../models/yolov8m-seg-dyn-f16.onnx")
-    .with_trt(0) // using cuda(0) by default
-// when model with dynamic shapes
-    .with_i00((1, 2, 4).into()) // dynamic batch
-    .with_i02((416, 640, 800).into())   // dynamic height
-    .with_i03((416, 640, 800).into())   // dynamic width
-    .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
-    .with_dry_run(3)
-    .with_saveout("YOLOv8");    // save results
+    .with_model("../models/yolov8m-seg-dyn-f16.onnx");
 let mut model = YOLO::new(&options)?;
 ```
 
+- If you want to run your model with TensorRT or CoreML
+    ```Rust
+    let options = Options::default()
+        .with_trt(0) // using cuda by default
+        // .with_coreml(0) 
+    ```
+
+
+- If your model has dynamic shapes
+    ```Rust
+    let options = Options::default()
+        .with_i00((1, 2, 4).into()) // dynamic batch
+        .with_i02((416, 640, 800).into())   // dynamic height
+        .with_i03((416, 640, 800).into())   // dynamic width
+    ```
+
+- If you want to set a confidence level for each category
+    ```Rust
+    let options = Options::default()
+        .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
+    ```
+
+- Go check [Options](src/options.rs) for more model options.
+
+
+
 #### 4. Prepare inputs, and then you're ready to go
 
 - Build `DataLoader` to load images
@@ -98,10 +114,17 @@ for (xs, _paths) in dl {
 - Or simply read one image
 
 ```Rust
-let x = DataLoader::try_read("./assets/bus.jpg")?;
-let _y = model.run(&[x])?;
+let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
+let y = model.run(&x)?;
+```
+
+#### 5. Annotate and save results
+```Rust
+let annotator = Annotator::default().with_saveout("YOLOv8");
+annotator.annotate(&x, &y);
 ```
 
+
 ## Script: converte ONNX model from `float32` to `float16`
 
 ```python

diff --git a/assets/db.png b/assets/db.png
diff --git a/examples/assets/bus.jpg b/examples/assets/bus.jpg
diff --git a/examples/assets/falldown.jpg b/examples/assets/falldown.jpg
diff --git a/examples/assets/kids.jpg b/examples/assets/kids.jpg
diff --git a/assets/math.jpg → examples/assets/math.jpg b/assets/math.jpg → examples/assets/math.jpg
diff --git a/examples/assets/trash.jpg b/examples/assets/trash.jpg
diff --git a/examples/clip/README.md b/examples/clip/README.md
@@ -42,14 +42,14 @@ cargo run -r --example clip
 ## Results
 
 ```shell
-(82.24775%) ./examples/clip/images/carrot.jpg => 几个胡萝卜 
-[0.06708972, 0.0067733657, 0.0019306632, 0.8224775, 0.003044935, 0.083962336, 0.014721389]
+(90.11472%) ./examples/clip/images/carrot.jpg => 几个胡萝卜 
+[0.04573484, 0.0048218793, 0.0011618224, 0.90114725, 0.0036694852, 0.031348046, 0.0121166315]
 
-(85.56889%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table 
-[0.0786363, 0.0004783095, 0.00060898095, 0.06286741, 0.0006842306, 0.8556889, 0.0010357979]
+(94.07785%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant 
+[0.050406333, 0.0011632168, 0.0019338318, 0.0013227565, 0.003916758, 0.00047858112, 0.9407785]
 
-(90.03625%) ./examples/clip/images/peoples.jpg => Some people holding wine glasses in a restaurant 
-[0.07473288, 0.0027821448, 0.0075673857, 0.010874652, 0.003041679, 0.0006387719, 0.9003625]
+(86.59852%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table 
+[0.07032883, 0.00053773675, 0.0006372929, 0.06066096, 0.0007378078, 0.8659852, 0.0011121632]
 ```
 
 

diff --git a/examples/db/README.md b/examples/db/README.md
@@ -16,7 +16,6 @@ cargo run -r --example db
 ```Rust
 let options = Options::default()
     .with_model("ONNX_PATH")    // <= modify this
-    .with_profile(false);
 ```
 
 ### 3. Run
@@ -27,10 +26,10 @@ cargo run -r --example db
 
 ### Speed test
 
-| Model           | Image size | TensorRT<br />f16 | TensorRT<br />f32 | CUDA<br />f32 |
-| --------------- | ---------- | ----------------- | ----------------- | ------------- |
-| ppocr-v3-db-dyn | 640x640    | 1.8585ms          | 2.5739ms          | 4.3314ms      |
-| ppocr-v4-db-dyn | 640x640    | 2.0507ms          | 2.8264ms          | 6.6064ms      |
+| Model           | Image size | TensorRT<br />f16<br />batch=1<br />(ms) | TensorRT<br />f32<br />batch=1<br />(ms) | CUDA<br />f32<br />batch=1<br />(ms) |
+| --------------- | ---------- | ---------------------------------------- | ---------------------------------------- | ------------------------------------ |
+| ppocr-v3-db-dyn | 640x640    | 1.8585                                   | 2.5739                                   | 4.3314                               |
+| ppocr-v4-db-dyn | 640x640    | 2.0507                                   | 2.8264                                   | 6.6064                               |
 
 ***Test on RTX3060***
 

diff --git a/examples/db/demo.jpg b/examples/db/demo.jpg
diff --git a/examples/db/main.rs b/examples/db/main.rs
@@ -1,25 +1,33 @@
-use usls::{models::DB, DataLoader, Options};
+use usls::{models::DB, Annotator, DataLoader, Options};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
     let options = Options::default()
-        .with_model("../models/ppocr-v4-db-dyn.onnx")
-        .with_i00((1, 1, 4).into())
-        .with_i02((608, 640, 960).into())
-        .with_i03((608, 640, 960).into())
-        .with_confs(&[0.7])
-        .with_saveout("DB-Text-Detection")
-        .with_dry_run(5)
+        .with_i00((1, 4, 8).into())
+        .with_i02((608, 960, 1280).into())
+        .with_i03((608, 960, 1280).into())
+        .with_confs(&[0.4])
+        .with_min_width(5.0)
+        .with_min_height(12.0)
         // .with_trt(0)
-        // .with_fp16(true)
-        .with_profile(true);
+        .with_model("../models/ppocr-v4-db-dyn.onnx");
+
     let mut model = DB::new(&options)?;
 
     // load image
-    let x = DataLoader::try_read("./assets/math.jpg")?;
+    let x = vec![DataLoader::try_read("./assets/db.png")?];
 
     // run
-    let _y = model.run(&[x])?;
+    let y = model.run(&x)?;
+
+    // annotate
+    let annotator = Annotator::default()
+        .with_polygon_color([255u8, 0u8, 0u8])
+        .without_name(true)
+        .without_polygons(false)
+        .without_bboxes(false)
+        .with_saveout("DB-Text-Detection");
+    annotator.annotate(&x, &y);
 
     Ok(())
 }
diff --git a/examples/dinov2/main.rs b/examples/dinov2/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::Dinov2, Metric, Options};
+use usls::{models::Dinov2, Options};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
@@ -8,31 +8,32 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .with_i00((1, 1, 1).into())
         .with_i02((224, 224, 224).into())
         .with_i03((224, 224, 224).into());
-    let mut model = Dinov2::new(&options)?;
+    let _model = Dinov2::new(&options)?;
+    println!("TODO...");
 
     // query from vector
-    let ys = model.query_from_vec(
-        "./assets/bus.jpg",
-        &[
-            "./examples/dinov2/images/bus.jpg",
-            "./examples/dinov2/images/1.jpg",
-            "./examples/dinov2/images/2.jpg",
-        ],
-        Metric::L2,
-    )?;
+    // let ys = model.query_from_vec(
+    //     "./assets/bus.jpg",
+    //     &[
+    //         "./examples/dinov2/images/bus.jpg",
+    //         "./examples/dinov2/images/1.jpg",
+    //         "./examples/dinov2/images/2.jpg",
+    //     ],
+    //     Metric::L2,
+    // )?;
 
     // or query from folder
     // let ys = model.query_from_folder("./assets/bus.jpg", "./examples/dinov2/images", Metric::IP)?;
 
     // results
-    for (i, y) in ys.iter().enumerate() {
-        println!(
-            "Top-{:<3}{:.7} {}",
-            i + 1,
-            y.1,
-            y.2.canonicalize()?.display()
-        );
-    }
+    // for (i, y) in ys.iter().enumerate() {
+    //     println!(
+    //         "Top-{:<3}{:.7} {}",
+    //         i + 1,
+    //         y.1,
+    //         y.2.canonicalize()?.display()
+    //     );
+    // }
 
     Ok(())
 }
diff --git a/examples/fastsam/main.rs b/examples/fastsam/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::YOLO, DataLoader, Options};
+use usls::{models::YOLO, Annotator, DataLoader, Options};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
@@ -7,16 +7,18 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .with_i00((1, 1, 4).into())
         .with_i02((416, 640, 800).into())
         .with_i03((416, 640, 800).into())
-        .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
-        .with_saveout("FastSAM")
-        .with_profile(false);
+        .with_confs(&[0.4]);
     let mut model = YOLO::new(&options)?;
 
-    // build dataloader
-    let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
+    // load image
+    let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
 
     // run
-    model.run(&dl.next().unwrap().0)?;
+    let y = model.run(&x)?;
+
+    // annotate
+    let annotator = Annotator::default().with_saveout("FastSAM");
+    annotator.annotate(&x, &y);
 
     Ok(())
 }
diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs
@@ -1,19 +1,22 @@
-use usls::{models::RTDETR, DataLoader, Options, COCO_NAMES_80};
+use usls::{models::RTDETR, Annotator, DataLoader, Options, COCO_NAMES_80};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
     let options = Options::default()
         .with_model("../models/rtdetr-l-f16.onnx")
         .with_confs(&[0.4, 0.15]) // person: 0.4, others: 0.15
-        .with_names(&COCO_NAMES_80)
-        .with_saveout("RT-DETR");
+        .with_names(&COCO_NAMES_80);
     let mut model = RTDETR::new(&options)?;
 
-    // build dataloader
-    let mut dl = DataLoader::default().load("./assets/bus.jpg")?;
+    // load image
+    let x = vec![DataLoader::try_read("./assets/bus.jpg")?];
 
     // run
-    model.run(&dl.next().unwrap().0)?;
+    let y = model.run(&x)?;
+
+    // annotate
+    let annotator = Annotator::default().with_saveout("RT-DETR");
+    annotator.annotate(&x, &y);
 
     Ok(())
 }