diff --git a/Cargo.toml b/Cargo.toml index 15aa2e9..76cc2d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "usls" version = "0.0.21" +rust-version = "1.79" edition = "2021" description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models." repository = "https://github.com/jamjamjon/usls" @@ -9,7 +10,6 @@ license = "MIT" readme = "README.md" exclude = ["assets/*", "examples/*", "runs/*", "benches/*"] - [dependencies] aksr = { version = "0.0.2" } image = { version = "0.25.2" } @@ -22,7 +22,6 @@ rand = { version = "0.8.5" } chrono = { version = "0.4.30" } tokenizers = { version = "0.15.2" } log = { version = "0.4.22" } -env_logger = { version = "0.11.5" } indicatif = "0.17.8" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } @@ -42,19 +41,15 @@ video-rs = { version = "0.10.0", features = ["ndarray"], optional = true } minifb = { version = "0.27.0", optional = true } argh = "0.1.13" - [dev-dependencies] +env_logger = { version = "0.11.5" } tracing-subscriber = { version = "0.3.18" } tracing = { version = "0.1.40", features = ["log"] } - - [[example]] name = "viewer" required-features = ["ffmpeg"] - - [features] default = [ "ort/ndarray", @@ -68,7 +63,6 @@ cuda = [ "ort/cuda" ] trt = [ "ort/tensorrt" ] mps = [ "ort/coreml" ] - [profile.release] # lto = true strip = true diff --git a/README.md b/README.md index 5fa1ec4..2b07e7e 100644 --- a/README.md +++ b/README.md @@ -1,221 +1,143 @@ -

-

usls

-

+

usls

- Documentation -
-
+ + Rust Continuous Integration Badge + + + usls Version + + + Rust MSRV + - ONNXRuntime Release Page + ONNXRuntime MSRV - CUDA Toolkit Page + CUDA MSRV - TensorRT Page + TensorRT MSRV + + + Crates.io Total Downloads

-

- - Crates Page - - - - - - Crates.io Total Downloads - - + + Examples + + + usls documentation +

-**`usls`** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including: +**usls** is a Rust library integrated with **ONNXRuntime**, offering a suite of advanced models for **Computer Vision** and **Vision-Language** tasks, including: -- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLOv11](https://github.com/ultralytics/ultralytics) +- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [YOLO11](https://github.com/ultralytics/ultralytics) - **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) -- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro) -- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242) +- **Vision Models**: [RT-DETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569), [DepthPro](https://github.com/apple/ml-depth-pro), [FastViT](https://github.com/apple/ml-fastvit), [BEiT](https://github.com/microsoft/unilm/tree/master/beit), [MobileOne](https://github.com/apple/ml-mobileone) +- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242) +- **OCR Models**: [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947), [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159), [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html), [TrOCR](https://huggingface.co/microsoft/trocr-base-printed), [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO)
-Click to expand Supported Models - -## Supported Models - -| Model | Task / Type | Example | CUDA f32 | CUDA f16 | TensorRT f32 | TensorRT f16 | -|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------|----------|----------|--------------|--------------| -| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv11](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | -| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual | -| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual | -| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ | -| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ | -| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ | -| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | -| [Depth-Anything v1 & v2](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ | -| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ | -| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | | -| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Body Part Segmentation | [demo](examples/sapiens) | ✅ | ✅ | | | -| [Florence2](https://arxiv.org/abs/2311.06242) | a Variety of Vision Tasks | [demo](examples/florence2) | ✅ | ✅ | | | -| [DepthPro](https://github.com/apple/ml-depth-pro) | Monocular Depth Estimation | [demo](examples/depth-pro) | ✅ | ✅ | | | - - +More Supported Models + +| Model | Task / Description | Example | CPU | CoreML | CUDA
FP32 | CUDA
FP16 | TensorRT
FP32 | TensorRT
FP16 | +| -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------- | ---------------------------- | --- | ------ | -------------- | -------------- | ------------------ | ------------------ | +| [BEiT](https://github.com/microsoft/unilm/tree/master/beit) | Image Classification | [demo](examples/beit) | ✅ | ✅ | | | | | +| [ConvNeXt](https://github.com/facebookresearch/ConvNeXt) | Image Classification | [demo](examples/convnext) | ✅ | ✅ | | | | | +| [FastViT](https://github.com/apple/ml-fastvit) | Image Classification | [demo](examples/fastvit) | ✅ | ✅ | | | | | +| [MobileOne](https://github.com/apple/ml-mobileone) | Image Classification | [demo](examples/mobileone) | ✅ | ✅ | | | | | +| [DeiT](https://github.com/facebookresearch/deit) | Image Classification | [demo](examples/deit) | ✅ | ✅ | | | | | +| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision Embedding | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv5](https://github.com/ultralytics/yolov5) | Image Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Image Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLOv11](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Image Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [RT-DETR](https://github.com/lyuwenyu/RT-DETR) | Object Detection | [demo](examples/rtdetr) | ✅ | ✅ | | | | | +| [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.8/configs/picodet) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | | | | | +| [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO) | Object Detection | [demo](examples/picodet-layout) | ✅ | ✅ | | | | | +| [D-FINE](https://github.com/manhbd-22022602/D-FINE) | Object Detection | [demo](examples/d-fine) | ✅ | ✅ | | | | | +| [DEIM](https://github.com/ShihuaHuang95/DEIM) | Object Detection | [demo](examples/deim) | ✅ | ✅ | | | | | +| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | | +| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | | +| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | | +| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | | +| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | ✅ | ✅ | | | +| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Open-Set Detection With Language | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | ✅ | ✅ | | | +| [CLIP](https://github.com/openai/CLIP) | Vision-Language Embedding | [demo](examples/clip) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| [jina-clip-v1](https://huggingface.co/jinaai/jina-clip-v1) | Vision-Language Embedding | [demo](examples/clip) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| [BLIP](https://github.com/salesforce/BLIP) | Image Captioning | [demo](examples/blip) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| [DB(PaddleOCR-Det)](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [SVTR(PaddleOCR-Rec)](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [SLANet](https://paddlepaddle.github.io/PaddleOCR/latest/algorithm/table_recognition/algorithm_table_slanet.html) | Tabel Recognition | [demo](examples/slanet) | ✅ | ✅ | | | | | +| [TrOCR](https://huggingface.co/microsoft/trocr-base-printed) | Text Recognition | [demo](examples/trocr) | ✅ | ✅ | | | | | +| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [DepthAnything v1
DepthAnything v2](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | +| [DepthPro](https://github.com/apple/ml-depth-pro) | Monocular Depth Estimation | [demo](examples/depth-pro) | ✅ | ✅ | ✅ | ✅ | | | +| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Sapiens](https://github.com/facebookresearch/sapiens/tree/main) | Foundation for Human Vision Models | [demo](examples/sapiens) | ✅ | ✅ | ✅ | ✅ | | | +| [Florence2](https://arxiv.org/abs/2311.06242) | a Variety of Vision Tasks | [demo](examples/florence2) | ✅ | ✅ | ✅ | ✅ | | |
+## ⛳️ Cargo Features -## ⛳️ ONNXRuntime Linking +By default, none of the following features are enabled. You can enable them as needed: -
-You have two options to link the ONNXRuntime library +- `auto`: Automatically downloads prebuilt ONNXRuntime binaries from Pyke’s CDN for supported platforms. -- ### Option 1: Manual Linking + - If disabled, you'll need to [compile `ONNXRuntime` from source](https://github.com/microsoft/onnxruntime) or [download a precompiled package](https://github.com/microsoft/onnxruntime/releases), and then [link it manually](https://ort.pyke.io/setup/linking). - - #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking). +
+ 👉 For Linux or macOS Users - - #### For Linux or macOS Users: - - Download the ONNX Runtime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases). - - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable: - ```shell - export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0 - ``` - -- ### Option 2: Automatic Download - Just use `--features auto` - ```shell - cargo run -r --example yolo --features auto - ``` + - Download from the [Releases page](https://github.com/microsoft/onnxruntime/releases). + - Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable: + ```shell + export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.20.1 + ``` -
+
+- `ffmpeg`: Adds support for video streams, real-time frame visualization, and video export. + + - Powered by [video-rs](https://github.com/oddity-ai/video-rs) and [minifb](https://github.com/emoon/rust_minifb). For any issues related to `ffmpeg` features, please refer to the issues of these two crates. +- `cuda`: Enables the NVIDIA TensorRT provider. +- `trt`: Enables the NVIDIA TensorRT provider. +- `mps`: Enables the Apple CoreML provider. ## 🎈 Demo ```Shell -cargo run -r --example yolo # blip, clip, yolop, svtr, db, ... +cargo run -r -F cuda --example svtr -- --device cuda ``` +All examples are located in the [examples](./examples/) directory. + ## 🥂 Integrate Into Your Own Project -- #### Add `usls` as a dependency to your project's `Cargo.toml` - ```Shell - cargo add usls - ``` - - Or use a specific commit: - ```Toml - [dependencies] - usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" } - ``` - -- #### Follow the pipeline - - Build model with the provided `models` and `ModelConfig` - - Load images, video and stream with `DataLoader` - - Do inference - - Retrieve inference results from `Vec` - - Annotate inference results with `Annotator` - - Display images and write them to video with `Viewer` - -
-
- example code - - ```rust - use usls::{models::YOLO, Annotator, DataLoader, Nms, ModelConfig, Vision, YOLOTask, YOLOVersion}; - - fn main() -> anyhow::Result<()> { - // Build model with ModelConfig - let options = ModelConfig::new() - .with_trt(0) - .with_model("yolo/v8-m-dyn.onnx")? - .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR - .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb - .with_ixx(0, 0, (1, 2, 4).into()) - .with_ixx(0, 2, (0, 640, 640).into()) - .with_ixx(0, 3, (0, 640, 640).into()) - .with_confs(&[0.2]); - let mut model = YOLO::new(options)?; - - // Build DataLoader to load image(s), video, stream - let dl = DataLoader::new( - // "./assets/bus.jpg", // local image - // "images/bus.jpg", // remote image - // "../images-folder", // local images (from folder) - // "../demo.mp4", // local video - // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // online video - "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream - )? - .with_batch(2) // iterate with batch_size = 2 - .build()?; - - // Build annotator - let annotator = Annotator::new() - .with_bboxes_thickness(4) - .with_saveout("YOLO-DataLoader"); - - // Build viewer - let mut viewer = Viewer::new().with_delay(10).with_scale(1.).resizable(true); - - // Run and annotate results - for (xs, _) in dl { - let ys = model.forward(&xs, false)?; - // annotator.annotate(&xs, &ys); - let images_plotted = annotator.plot(&xs, &ys, false)?; - - // show image - viewer.imshow(&images_plotted)?; - - // check out window and key event - if !viewer.is_open() || viewer.is_key_pressed(crate::Key::Escape) { - break; - } - - // write video - viewer.write_batch(&images_plotted)?; - - // Retrieve inference results - for y in ys { - // bboxes - if let Some(bboxes) = y.bboxes() { - for bbox in bboxes { - println!( - "Bbox: {}, {}, {}, {}, {}, {}", - bbox.xmin(), - bbox.ymin(), - bbox.xmax(), - bbox.ymax(), - bbox.confidence(), - bbox.id(), - ); - } - } - } - } - - // finish video write - viewer.finish_write()?; - - Ok(()) - } - ``` - -
-
+Add `usls` as a dependency to your project's `Cargo.toml` + +```Shell +cargo add usls +``` + +Or use a specific commit: + +```Toml +[dependencies] +usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" } +``` + +## 🥳 If you find this helpful, please give it a star ⭐ ## 📌 License + This project is licensed under [LICENSE](LICENSE). diff --git a/examples/blip/README.md b/examples/blip/README.md index 7b2161d..6121661 100644 --- a/examples/blip/README.md +++ b/examples/blip/README.md @@ -6,6 +6,8 @@ This demo shows how to use [BLIP](https://arxiv.org/abs/2201.12086) to do condit cargo run -r -F cuda --example blip -- --device cuda:0 --source images/dog.jpg --source ./assets/bus.jpg --source images/green-car.jpg ``` +## Results + ```shell Unconditional: Ys([Y { Texts: [Text("a dog running through a field of grass")] }, Y { Texts: [Text("a group of people walking around a bus")] }, Y { Texts: [Text("a green volkswagen beetle parked in front of a yellow building")] }]) Conditional: Ys([Y { Texts: [Text("this image depicting a dog running in a field")] }, Y { Texts: [Text("this image depict a bus in barcelona")] }, Y { Texts: [Text("this image depict a blue volkswagen beetle parked in a street in havana, cuba")] }]) diff --git a/examples/clip/README.md b/examples/clip/README.md index 09ff510..71fe94e 100644 --- a/examples/clip/README.md +++ b/examples/clip/README.md @@ -12,5 +12,4 @@ cargo run -r -F cuda --example clip -- --device cuda:0 (99.9675%) ./examples/clip/images/carrot.jpg => Some carrots (99.93718%) ./examples/clip/images/doll.jpg => There is a doll with red hair and a clock on a table (100.0%) ./examples/clip/images/drink.jpg => Some people holding wine glasses in a restaurant - -``` \ No newline at end of file +``` diff --git a/examples/fastsam/README.md b/examples/fastsam/README.md new file mode 100644 index 0000000..b2984e1 --- /dev/null +++ b/examples/fastsam/README.md @@ -0,0 +1,5 @@ +## Quick Start + +```shell +cargo run -r -F cuda --example fastsam -- --device cuda +``` diff --git a/examples/fastsam.rs b/examples/fastsam/main.rs similarity index 89% rename from examples/fastsam.rs rename to examples/fastsam/main.rs index 1b3bde8..c86c566 100644 --- a/examples/fastsam.rs +++ b/examples/fastsam/main.rs @@ -14,10 +14,6 @@ struct Args { } fn main() -> Result<()> { - tracing_subscriber::fmt() - .with_max_level(tracing::Level::INFO) - .init(); - let args: Args = argh::from_env(); // build model @@ -40,7 +36,5 @@ fn main() -> Result<()> { .with_saveout("fastsam"); annotator.annotate(&xs, &ys); - model.summary(); - Ok(()) } diff --git a/examples/florence2/README.md b/examples/florence2/README.md index 3764ea8..6078515 100644 --- a/examples/florence2/README.md +++ b/examples/florence2/README.md @@ -16,8 +16,7 @@ Task: Caption(2) Ys([Y { Texts: [Text("The image shows a vintage Volkswagen Beetle car parked on a cobblestone street in front of a yellow building with two wooden doors. The car is a light blue color with silver rims and appears to be in good condition. The building has a sloping roof and is painted in a bright yellow color. The sky is blue and there are trees in the background. The overall mood of the image is peaceful and serene.")] }, Y { Texts: [Text("The image shows a blue and white bus with the logo of the Brazilian football club, Cero Emisiones, on the side. The bus is parked on a street with a building in the background. There are several people walking on the sidewalk in front of the bus, some of them are carrying bags and one person is holding a camera. The sky is blue and there are trees and a traffic light visible in the top right corner of the image. The image appears to be taken during the day.")] }]) ``` - -# Tasks +## Results | Task | Demo | | -----| ------| diff --git a/examples/rtdetr/README.md b/examples/rtdetr/README.md index 6d061a5..711c097 100644 --- a/examples/rtdetr/README.md +++ b/examples/rtdetr/README.md @@ -1,11 +1,17 @@ ## Quick Start - -**Models exported from [RT-DETR](https://github.com/lyuwenyu/RT-DETR)** - - ```shell cargo run -r --example rtdetr ``` +## Results +``` +[Bboxes]: Found 5 objects +0: Bbox { xyxy: [47.969677, 397.81808, 246.22426, 904.8823], class_id: 0, name: Some("person"), confidence: 0.94432133 } +1: Bbox { xyxy: [668.0796, 399.28854, 810.3779, 880.7412], class_id: 0, name: Some("person"), confidence: 0.93386495 } +2: Bbox { xyxy: [20.852705, 229.30482, 807.43494, 729.51196], class_id: 5, name: Some("bus"), confidence: 0.9319465 } +3: Bbox { xyxy: [223.28226, 405.37265, 343.92603, 859.50366], class_id: 0, name: Some("person"), confidence: 0.9130827 } +4: Bbox { xyxy: [0.0, 552.6165, 65.99908, 868.00525], class_id: 0, name: Some("person"), confidence: 0.7910869 } + +``` diff --git a/examples/rtdetr/main.rs b/examples/rtdetr/main.rs index 398f726..df5e14c 100644 --- a/examples/rtdetr/main.rs +++ b/examples/rtdetr/main.rs @@ -13,17 +13,26 @@ fn main() -> Result<()> { let mut model = RTDETR::new(options)?; // load - let x = [DataLoader::try_read("./assets/bus.jpg")?]; + let xs = [DataLoader::try_read("./assets/bus.jpg")?]; // run - let y = model.forward(&x)?; - println!("{:?}", y); + let ys = model.forward(&xs)?; + + // extract bboxes + for y in ys.iter() { + if let Some(bboxes) = y.bboxes() { + println!("[Bboxes]: Found {} objects", bboxes.len()); + for (i, bbox) in bboxes.iter().enumerate() { + println!("{}: {:?}", i, bbox) + } + } + } // annotate let annotator = Annotator::default() .with_bboxes_thickness(3) .with_saveout(model.spec()); - annotator.annotate(&x, &y); + annotator.annotate(&xs, &ys); Ok(()) } diff --git a/examples/sam/README.md b/examples/sam/README.md index 6b85c99..34db1e3 100644 --- a/examples/sam/README.md +++ b/examples/sam/README.md @@ -15,7 +15,6 @@ cargo run -r -F cuda --example sam -- --device cuda --kind edge-sam cargo run -r -F cuda --example sam -- --device cuda --kind sam-hq ``` - ## Results ![](https://github.com/jamjamjon/assets/releases/download/sam/demo-car.png) diff --git a/examples/sapiens/README.md b/examples/sapiens/README.md index 3112e69..7699915 100644 --- a/examples/sapiens/README.md +++ b/examples/sapiens/README.md @@ -4,7 +4,6 @@ cargo run -r -F cuda --example sapiens -- --device cuda ``` - ## Results ![](https://github.com/jamjamjon/assets/releases/download/sapiens/demo.png) diff --git a/examples/slanet/README.md b/examples/slanet/README.md index dac09f0..9ee499a 100644 --- a/examples/slanet/README.md +++ b/examples/slanet/README.md @@ -4,7 +4,6 @@ cargo run -r -F cuda --example slanet -- --device cuda ``` - ## Results ![](https://github.com/jamjamjon/assets/releases/download/slanet/demo.png) diff --git a/examples/yolo/main.rs b/examples/yolo/main.rs index fe042f5..cbee159 100644 --- a/examples/yolo/main.rs +++ b/examples/yolo/main.rs @@ -223,14 +223,14 @@ fn main() -> Result<()> { for (xs, _paths) in dl { let ys = model.forward(&xs)?; // extract bboxes - for y in ys.iter() { - if let Some(bboxes) = y.bboxes() { - println!("[Bboxes]: Found {} objects", bboxes.len()); - for (i, bbox) in bboxes.iter().enumerate() { - println!("{}: {:?}", i, bbox) - } - } - } + // for y in ys.iter() { + // if let Some(bboxes) = y.bboxes() { + // println!("[Bboxes]: Found {} objects", bboxes.len()); + // for (i, bbox) in bboxes.iter().enumerate() { + // println!("{}: {:?}", i, bbox) + // } + // } + // } // plot annotator.annotate(&xs, &ys); diff --git a/examples/yolov8-rtdetr/README.md b/examples/yolov8-rtdetr/README.md new file mode 100644 index 0000000..78eabd8 --- /dev/null +++ b/examples/yolov8-rtdetr/README.md @@ -0,0 +1,9 @@ +## Quick Start + +```shell +cargo run -r -F cuda --example yolov8-rtdetr -- --device cuda +``` + +```shell +Ys([Y { BBoxes: [Bbox { xyxy: [668.71356, 395.4159, 809.01587, 879.3043], class_id: 0, name: Some("person"), confidence: 0.950527 }, Bbox { xyxy: [48.866394, 399.50665, 248.22641, 904.7525], class_id: 0, name: Some("person"), confidence: 0.9504415 }, Bbox { xyxy: [20.197449, 230.00304, 805.026, 730.3445], class_id: 5, name: Some("bus"), confidence: 0.94705224 }, Bbox { xyxy: [221.3088, 405.65436, 345.44052, 860.2628], class_id: 0, name: Some("person"), confidence: 0.93062377 }, Bbox { xyxy: [0.34117508, 549.8391, 76.50758, 868.87646], class_id: 0, name: Some("person"), confidence: 0.71064234 }, Bbox { xyxy: [282.12543, 484.14166, 296.43207, 520.96246], class_id: 27, name: Some("tie"), confidence: 0.40305245 }] }]) +``` diff --git a/examples/yolov8-rtdetr.rs b/examples/yolov8-rtdetr/main.rs similarity index 89% rename from examples/yolov8-rtdetr.rs rename to examples/yolov8-rtdetr/main.rs index 9b87e71..1471b51 100644 --- a/examples/yolov8-rtdetr.rs +++ b/examples/yolov8-rtdetr/main.rs @@ -14,10 +14,6 @@ struct Args { } fn main() -> Result<()> { - tracing_subscriber::fmt() - .with_max_level(tracing::Level::INFO) - .init(); - let args: Args = argh::from_env(); // build model @@ -40,7 +36,5 @@ fn main() -> Result<()> { .with_saveout(model.spec()); annotator.annotate(&xs, &ys); - model.summary(); - Ok(()) } diff --git a/src/models/dfine/README.md b/src/models/d_fine/README.md similarity index 100% rename from src/models/dfine/README.md rename to src/models/d_fine/README.md diff --git a/src/models/dfine/config.rs b/src/models/d_fine/config.rs similarity index 100% rename from src/models/dfine/config.rs rename to src/models/d_fine/config.rs diff --git a/src/models/dfine/mod.rs b/src/models/d_fine/mod.rs similarity index 100% rename from src/models/dfine/mod.rs rename to src/models/d_fine/mod.rs diff --git a/src/models/mod.rs b/src/models/mod.rs index 1a57ac6..624c961 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -2,12 +2,12 @@ mod beit; mod blip; mod clip; mod convnext; +mod d_fine; mod db; mod deim; mod deit; mod depth_anything; mod depth_pro; -mod dfine; mod dinov2; mod fastvit; mod florence2;