diff --git a/Cargo.toml b/Cargo.toml
index 15aa2e9..76cc2d6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,7 @@
[package]
name = "usls"
version = "0.0.21"
+rust-version = "1.79"
edition = "2021"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
@@ -9,7 +10,6 @@ license = "MIT"
readme = "README.md"
exclude = ["assets/*", "examples/*", "runs/*", "benches/*"]
-
[dependencies]
aksr = { version = "0.0.2" }
image = { version = "0.25.2" }
@@ -22,7 +22,6 @@ rand = { version = "0.8.5" }
chrono = { version = "0.4.30" }
tokenizers = { version = "0.15.2" }
log = { version = "0.4.22" }
-env_logger = { version = "0.11.5" }
indicatif = "0.17.8"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
@@ -42,19 +41,15 @@ video-rs = { version = "0.10.0", features = ["ndarray"], optional = true }
minifb = { version = "0.27.0", optional = true }
argh = "0.1.13"
-
[dev-dependencies]
+env_logger = { version = "0.11.5" }
tracing-subscriber = { version = "0.3.18" }
tracing = { version = "0.1.40", features = ["log"] }
-
-
[[example]]
name = "viewer"
required-features = ["ffmpeg"]
-
-
[features]
default = [
"ort/ndarray",
@@ -68,7 +63,6 @@ cuda = [ "ort/cuda" ]
trt = [ "ort/tensorrt" ]
mps = [ "ort/coreml" ]
-
[profile.release]
# lto = true
strip = true
diff --git a/README.md b/README.md
index 5fa1ec4..2b07e7e 100644
--- a/README.md
+++ b/README.md
@@ -1,221 +1,143 @@
-
-
usls
-
+usls
- Documentation
-
-
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
-**`usls`** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including:
+**usls** is a Rust library integrated with **ONNXRuntime**, offering a suite of advanced models for **Computer Vision** and **Vision-Language** tasks, including:
-- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLOv11](https://github.com/ultralytics/ultralytics)
+- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLO11](https://github.com/ultralytics/ultralytics)
- **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
-- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro)
-- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242)
+- **Vision Models**: [RT-DETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro), [FastViT](https://github.com/apple/ml-fastvit), [BEiT](https://github.com/microsoft/unilm/tree/master/beit), [MobileOne](https://github.com/apple/ml-mobileone)
+- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242)
+- **OCR Models**: [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947), [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159), [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html), [TrOCR](https://huggingface.co/microsoft/trocr-base-printed), [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
-Click to expand Supported Models
-
-## Supported Models
-
-| Model | Task / Type | Example | CUDA f32 | CUDA f16 | TensorRT f32 | TensorRT f16 |
-|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------|----------|----------|--------------|--------------|
-| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv11](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
-| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual |
-| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual |
-| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ |
-| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ |
-| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ |
-| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ |
-| [Depth-Anything v1 & v2](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ |
-| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ |
-| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | |
-| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Body Part Segmentation | [demo](examples/sapiens) | ✅ | ✅ | | |
-| [Florence2](https://arxiv.org/abs/2311.06242) | a Variety of Vision Tasks | [demo](examples/florence2) | ✅ | ✅ | | |
-| [DepthPro](https://github.com/apple/ml-depth-pro) | Monocular Depth Estimation | [demo](examples/depth-pro) | ✅ | ✅ | | |
-
-
+More Supported Models
+
+| Model | Task / Description | Example | CPU | CoreML | CUDA
FP32 | CUDA
FP16 | TensorRT
FP32 | TensorRT
FP16 |
+| -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------- | --- | ------ | -------------- | -------------- | ------------------ | ------------------ |
+| [BEiT](https://github.com/microsoft/unilm/tree/master/beit) | Image Classification | [demo](examples/beit) | ✅ | ✅ | | | | |
+| [ConvNeXt](https://github.com/facebookresearch/ConvNeXt) | Image Classification | [demo](examples/convnext) | ✅ | ✅ | | | | |
+| [FastViT](https://github.com/apple/ml-fastvit) | Image Classification | [demo](examples/fastvit) | ✅ | ✅ | | | | |
+| [MobileOne](https://github.com/apple/ml-mobileone) | Image Classification | [demo](examples/mobileone) | ✅ | ✅ | | | | |
+| [DeiT](https://github.com/facebookresearch/deit) | Image Classification | [demo](examples/deit) | ✅ | ✅ | | | | |
+| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision Embedding | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv5](https://github.com/ultralytics/yolov5) | Image Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Image Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv11](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Image Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [RT-DETR](https://github.com/lyuwenyu/RT-DETR) | Object Detection | [demo](examples/rtdetr) | ✅ | ✅ | | | | |
+| [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.8/configs/picodet) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | | | | |
+| [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | | | | |
+| [D-FINE](https://github.com/manhbd-22022602/D-FINE) | Object Detection | [demo](examples/d-fine) | ✅ | ✅ | | | | |
+| [DEIM](https://github.com/ShihuaHuang95/DEIM) | Object Detection | [demo](examples/deim) | ✅ | ✅ | | | | |
+| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | |
+| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | |
+| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | |
+| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | |
+| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | |
+| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Open-Set Detection With Language | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | ✅ | ✅ | | |
+| [CLIP](https://github.com/openai/CLIP) | Vision-Language Embedding | [demo](examples/clip) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1) | Vision-Language Embedding | [demo](examples/clip) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [BLIP](https://github.com/salesforce/BLIP) | Image Captioning | [demo](examples/blip) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html) | Tabel Recognition | [demo](examples/slanet) | ✅ | ✅ | | | | |
+| [TrOCR](https://huggingface.co/microsoft/trocr-base-printed) | Text Recognition | [demo](examples/trocr) | ✅ | ✅ | | | | |
+| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [DepthAnything v1
DepthAnything v2](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
+| [DepthPro](https://github.com/apple/ml-depth-pro) | Monocular Depth Estimation | [demo](examples/depth-pro) | ✅ | ✅ | ✅ | ✅ | | |
+| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Foundation for Human Vision Models | [demo](examples/sapiens) | ✅ | ✅ | ✅ | ✅ | | |
+| [Florence2](https://arxiv.org/abs/2311.06242) | a Variety of Vision Tasks | [demo](examples/florence2) | ✅ | ✅ | ✅ | ✅ | | |
+## ⛳️ Cargo Features
-## ⛳️ ONNXRuntime Linking
+By default, none of the following features are enabled. You can enable them as needed:
-
-You have two options to link the ONNXRuntime library
+- `auto`: Automatically downloads prebuilt ONNXRuntime binaries from Pyke’s CDN for supported platforms.
-- ### Option 1: Manual Linking
+ - If disabled, you'll need to [compile `ONNXRuntime` from source](https://github.com/microsoft/onnxruntime) or [download a precompiled package](https://github.com/microsoft/onnxruntime/releases), and then [link it manually](https://ort.pyke.io/setup/linking).
- - #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking).
+
+ 👉 For Linux or macOS Users
- - #### For Linux or macOS Users:
- - Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
- - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
- ```shell
- export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0
- ```
-
-- ### Option 2: Automatic Download
- Just use `--features auto`
- ```shell
- cargo run -r --example yolo --features auto
- ```
+ - Download from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
+ - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
+ ```shell
+ export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.20.1
+ ```
-
+
+- `ffmpeg`: Adds support for video streams, real-time frame visualization, and video export.
+
+ - Powered by [video-rs](https://github.com/oddity-ai/video-rs) and [minifb](https://github.com/emoon/rust_minifb). For any issues related to `ffmpeg` features, please refer to the issues of these two crates.
+- `cuda`: Enables the NVIDIA TensorRT provider.
+- `trt`: Enables the NVIDIA TensorRT provider.
+- `mps`: Enables the Apple CoreML provider.
## 🎈 Demo
```Shell
-cargo run -r --example yolo # blip, clip, yolop, svtr, db, ...
+cargo run -r -F cuda --example svtr -- --device cuda
```
+All examples are located in the [examples](./examples/) directory.
+
## 🥂 Integrate Into Your Own Project
-- #### Add `usls` as a dependency to your project's `Cargo.toml`
- ```Shell
- cargo add usls
- ```
-
- Or use a specific commit:
- ```Toml
- [dependencies]
- usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
- ```
-
-- #### Follow the pipeline
- - Build model with the provided `models` and `ModelConfig`
- - Load images, video and stream with `DataLoader`
- - Do inference
- - Retrieve inference results from `Vec`
- - Annotate inference results with `Annotator`
- - Display images and write them to video with `Viewer`
-
-
-
- example code
-
- ```rust
- use usls::{models::YOLO, Annotator, DataLoader, Nms, ModelConfig, Vision, YOLOTask, YOLOVersion};
-
- fn main() -> anyhow::Result<()> {
- // Build model with ModelConfig
- let options = ModelConfig::new()
- .with_trt(0)
- .with_model("yolo/v8-m-dyn.onnx")?
- .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
- .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
- .with_ixx(0, 0, (1, 2, 4).into())
- .with_ixx(0, 2, (0, 640, 640).into())
- .with_ixx(0, 3, (0, 640, 640).into())
- .with_confs(&[0.2]);
- let mut model = YOLO::new(options)?;
-
- // Build DataLoader to load image(s), video, stream
- let dl = DataLoader::new(
- // "./assets/bus.jpg", // local image
- // "images/bus.jpg", // remote image
- // "../images-folder", // local images (from folder)
- // "../demo.mp4", // local video
- // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // online video
- "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
- )?
- .with_batch(2) // iterate with batch_size = 2
- .build()?;
-
- // Build annotator
- let annotator = Annotator::new()
- .with_bboxes_thickness(4)
- .with_saveout("YOLO-DataLoader");
-
- // Build viewer
- let mut viewer = Viewer::new().with_delay(10).with_scale(1.).resizable(true);
-
- // Run and annotate results
- for (xs, _) in dl {
- let ys = model.forward(&xs, false)?;
- // annotator.annotate(&xs, &ys);
- let images_plotted = annotator.plot(&xs, &ys, false)?;
-
- // show image
- viewer.imshow(&images_plotted)?;
-
- // check out window and key event
- if !viewer.is_open() || viewer.is_key_pressed(crate::Key::Escape) {
- break;
- }
-
- // write video
- viewer.write_batch(&images_plotted)?;
-
- // Retrieve inference results
- for y in ys {
- // bboxes
- if let Some(bboxes) = y.bboxes() {
- for bbox in bboxes {
- println!(
- "Bbox: {}, {}, {}, {}, {}, {}",
- bbox.xmin(),
- bbox.ymin(),
- bbox.xmax(),
- bbox.ymax(),
- bbox.confidence(),
- bbox.id(),
- );
- }
- }
- }
- }
-
- // finish video write
- viewer.finish_write()?;
-
- Ok(())
- }
- ```
-
-
-
+Add `usls` as a dependency to your project's `Cargo.toml`
+
+```Shell
+cargo add usls
+```
+
+Or use a specific commit:
+
+```Toml
+[dependencies]
+usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
+```
+
+## 🥳 If you find this helpful, please give it a star ⭐
## 📌 License
+
This project is licensed under [LICENSE](LICENSE).
diff --git a/examples/blip/README.md b/examples/blip/README.md
index 7b2161d..6121661 100644
--- a/examples/blip/README.md
+++ b/examples/blip/README.md
@@ -6,6 +6,8 @@ This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do condit
cargo run -r -F cuda --example blip -- --device cuda:0 --source images/dog.jpg --source ./assets/bus.jpg --source images/green-car.jpg
```
+## Results
+
```shell
Unconditional: Ys([Y { Texts: [Text("a dog running through a field of grass")] }, Y { Texts: [Text("a group of people walking around a bus")] }, Y { Texts: [Text("a green volkswagen beetle parked in front of a yellow building")] }])
Conditional: Ys([Y { Texts: [Text("this image depicting a dog running in a field")] }, Y { Texts: [Text("this image depict a bus in barcelona")] }, Y { Texts: [Text("this image depict a blue volkswagen beetle parked in a street in havana, cuba")] }])
diff --git a/examples/clip/README.md b/examples/clip/README.md
index 09ff510..71fe94e 100644
--- a/examples/clip/README.md
+++ b/examples/clip/README.md
@@ -12,5 +12,4 @@ cargo run -r -F cuda --example clip -- --device cuda:0
(99.9675%) ./examples/clip/images/carrot.jpg => Some carrots
(99.93718%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table
(100.0%) ./examples/clip/images/drink.jpg => Some people holding wine glasses in a restaurant
-
-```
\ No newline at end of file
+```
diff --git a/examples/fastsam/README.md b/examples/fastsam/README.md
new file mode 100644
index 0000000..b2984e1
--- /dev/null
+++ b/examples/fastsam/README.md
@@ -0,0 +1,5 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example fastsam -- --device cuda
+```
diff --git a/examples/fastsam.rs b/examples/fastsam/main.rs
similarity index 89%
rename from examples/fastsam.rs
rename to examples/fastsam/main.rs
index 1b3bde8..c86c566 100644
--- a/examples/fastsam.rs
+++ b/examples/fastsam/main.rs
@@ -14,10 +14,6 @@ struct Args {
}
fn main() -> Result<()> {
- tracing_subscriber::fmt()
- .with_max_level(tracing::Level::INFO)
- .init();
-
let args: Args = argh::from_env();
// build model
@@ -40,7 +36,5 @@ fn main() -> Result<()> {
.with_saveout("fastsam");
annotator.annotate(&xs, &ys);
- model.summary();
-
Ok(())
}
diff --git a/examples/florence2/README.md b/examples/florence2/README.md
index 3764ea8..6078515 100644
--- a/examples/florence2/README.md
+++ b/examples/florence2/README.md
@@ -16,8 +16,7 @@ Task: Caption(2)
Ys([Y { Texts: [Text("The image shows a vintage Volkswagen Beetle car parked on a cobblestone street in front of a yellow building with two wooden doors. The car is a light blue color with silver rims and appears to be in good condition. The building has a sloping roof and is painted in a bright yellow color. The sky is blue and there are trees in the background. The overall mood of the image is peaceful and serene.")] }, Y { Texts: [Text("The image shows a blue and white bus with the logo of the Brazilian football club, Cero Emisiones, on the side. The bus is parked on a street with a building in the background. There are several people walking on the sidewalk in front of the bus, some of them are carrying bags and one person is holding a camera. The sky is blue and there are trees and a traffic light visible in the top right corner of the image. The image appears to be taken during the day.")] }])
```
-
-# Tasks
+## Results
| Task | Demo |
| -----| ------|
diff --git a/examples/rtdetr/README.md b/examples/rtdetr/README.md
index 6d061a5..711c097 100644
--- a/examples/rtdetr/README.md
+++ b/examples/rtdetr/README.md
@@ -1,11 +1,17 @@
## Quick Start
-
-**Models exported from [RT-DETR](https://github.com/lyuwenyu/RT-DETR)**
-
-
```shell
cargo run -r --example rtdetr
```
+## Results
+```
+[Bboxes]: Found 5 objects
+0: Bbox { xyxy: [47.969677, 397.81808, 246.22426, 904.8823], class_id: 0, name: Some("person"), confidence: 0.94432133 }
+1: Bbox { xyxy: [668.0796, 399.28854, 810.3779, 880.7412], class_id: 0, name: Some("person"), confidence: 0.93386495 }
+2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 }
+3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 }
+4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 }
+
+```
diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs
index 398f726..df5e14c 100644
--- a/examples/rtdetr/main.rs
+++ b/examples/rtdetr/main.rs
@@ -13,17 +13,26 @@ fn main() -> Result<()> {
let mut model = RTDETR::new(options)?;
// load
- let x = [DataLoader::try_read("./assets/bus.jpg")?];
+ let xs = [DataLoader::try_read("./assets/bus.jpg")?];
// run
- let y = model.forward(&x)?;
- println!("{:?}", y);
+ let ys = model.forward(&xs)?;
+
+ // extract bboxes
+ for y in ys.iter() {
+ if let Some(bboxes) = y.bboxes() {
+ println!("[Bboxes]: Found {} objects", bboxes.len());
+ for (i, bbox) in bboxes.iter().enumerate() {
+ println!("{}: {:?}", i, bbox)
+ }
+ }
+ }
// annotate
let annotator = Annotator::default()
.with_bboxes_thickness(3)
.with_saveout(model.spec());
- annotator.annotate(&x, &y);
+ annotator.annotate(&xs, &ys);
Ok(())
}
diff --git a/examples/sam/README.md b/examples/sam/README.md
index 6b85c99..34db1e3 100644
--- a/examples/sam/README.md
+++ b/examples/sam/README.md
@@ -15,7 +15,6 @@ cargo run -r -F cuda --example sam -- --device cuda --kind edge-sam
cargo run -r -F cuda --example sam -- --device cuda --kind sam-hq
```
-
## Results
![](https://github.com/jamjamjon/assets/releases/download/sam/demo-car.png)
diff --git a/examples/sapiens/README.md b/examples/sapiens/README.md
index 3112e69..7699915 100644
--- a/examples/sapiens/README.md
+++ b/examples/sapiens/README.md
@@ -4,7 +4,6 @@
cargo run -r -F cuda --example sapiens -- --device cuda
```
-
## Results
![](https://github.com/jamjamjon/assets/releases/download/sapiens/demo.png)
diff --git a/examples/slanet/README.md b/examples/slanet/README.md
index dac09f0..9ee499a 100644
--- a/examples/slanet/README.md
+++ b/examples/slanet/README.md
@@ -4,7 +4,6 @@
cargo run -r -F cuda --example slanet -- --device cuda
```
-
## Results
![](https://github.com/jamjamjon/assets/releases/download/slanet/demo.png)
diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs
index fe042f5..cbee159 100644
--- a/examples/yolo/main.rs
+++ b/examples/yolo/main.rs
@@ -223,14 +223,14 @@ fn main() -> Result<()> {
for (xs, _paths) in dl {
let ys = model.forward(&xs)?;
// extract bboxes
- for y in ys.iter() {
- if let Some(bboxes) = y.bboxes() {
- println!("[Bboxes]: Found {} objects", bboxes.len());
- for (i, bbox) in bboxes.iter().enumerate() {
- println!("{}: {:?}", i, bbox)
- }
- }
- }
+ // for y in ys.iter() {
+ // if let Some(bboxes) = y.bboxes() {
+ // println!("[Bboxes]: Found {} objects", bboxes.len());
+ // for (i, bbox) in bboxes.iter().enumerate() {
+ // println!("{}: {:?}", i, bbox)
+ // }
+ // }
+ // }
// plot
annotator.annotate(&xs, &ys);
diff --git a/examples/yolov8-rtdetr/README.md b/examples/yolov8-rtdetr/README.md
new file mode 100644
index 0000000..78eabd8
--- /dev/null
+++ b/examples/yolov8-rtdetr/README.md
@@ -0,0 +1,9 @@
+## Quick Start
+
+```shell
+cargo run -r -F cuda --example yolov8-rtdetr -- --device cuda
+```
+
+```shell
+Ys([Y { BBoxes: [Bbox { xyxy: [668.71356, 395.4159, 809.01587, 879.3043], class_id: 0, name: Some("person"), confidence: 0.950527 }, Bbox { xyxy: [48.866394, 399.50665, 248.22641, 904.7525], class_id: 0, name: Some("person"), confidence: 0.9504415 }, Bbox { xyxy: [20.197449, 230.00304, 805.026, 730.3445], class_id: 5, name: Some("bus"), confidence: 0.94705224 }, Bbox { xyxy: [221.3088, 405.65436, 345.44052, 860.2628], class_id: 0, name: Some("person"), confidence: 0.93062377 }, Bbox { xyxy: [0.34117508, 549.8391, 76.50758, 868.87646], class_id: 0, name: Some("person"), confidence: 0.71064234 }, Bbox { xyxy: [282.12543, 484.14166, 296.43207, 520.96246], class_id: 27, name: Some("tie"), confidence: 0.40305245 }] }])
+```
diff --git a/examples/yolov8-rtdetr.rs b/examples/yolov8-rtdetr/main.rs
similarity index 89%
rename from examples/yolov8-rtdetr.rs
rename to examples/yolov8-rtdetr/main.rs
index 9b87e71..1471b51 100644
--- a/examples/yolov8-rtdetr.rs
+++ b/examples/yolov8-rtdetr/main.rs
@@ -14,10 +14,6 @@ struct Args {
}
fn main() -> Result<()> {
- tracing_subscriber::fmt()
- .with_max_level(tracing::Level::INFO)
- .init();
-
let args: Args = argh::from_env();
// build model
@@ -40,7 +36,5 @@ fn main() -> Result<()> {
.with_saveout(model.spec());
annotator.annotate(&xs, &ys);
- model.summary();
-
Ok(())
}
diff --git a/src/models/dfine/README.md b/src/models/d_fine/README.md
similarity index 100%
rename from src/models/dfine/README.md
rename to src/models/d_fine/README.md
diff --git a/src/models/dfine/config.rs b/src/models/d_fine/config.rs
similarity index 100%
rename from src/models/dfine/config.rs
rename to src/models/d_fine/config.rs
diff --git a/src/models/dfine/mod.rs b/src/models/d_fine/mod.rs
similarity index 100%
rename from src/models/dfine/mod.rs
rename to src/models/d_fine/mod.rs
diff --git a/src/models/mod.rs b/src/models/mod.rs
index 1a57ac6..624c961 100644
--- a/src/models/mod.rs
+++ b/src/models/mod.rs
@@ -2,12 +2,12 @@ mod beit;
mod blip;
mod clip;
mod convnext;
+mod d_fine;
mod db;
mod deim;
mod deit;
mod depth_anything;
mod depth_pro;
-mod dfine;
mod dinov2;
mod fastvit;
mod florence2;