Skip to content

Commit

Permalink
feat: Implement CountGD multi-modal counting app, with focus on small…
Browse files Browse the repository at this point in the history
… object detection

Implements the CountGD application as described in Amini-Naieni et al. (NeurIPS 2024).https://github.com/niki-amini-naieni/CountGD
  • Loading branch information
healthonrails committed Dec 23, 2024
1 parent ff8a3cd commit 3debde1
Show file tree
Hide file tree
Showing 105 changed files with 80,763 additions and 10 deletions.
45 changes: 45 additions & 0 deletions annolid/detector/countgd/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Use the specified Python runtime as a parent image
FROM docker.io/nvidia/cuda:12.1.0-cudnn8-devel-ubi8@sha256:f045009cab64c9fda6113b4473ac1c57dfcca65e18ce981bce63f3cddf7b807a

# Set the working directory in the container
WORKDIR /usr/src/app

# Install required packages
RUN apt-get update && apt-get install -y \
gcc-11 \
build-essential \
ffmpeg \
libsm6 \
libxext6 \
curl \
git \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Set environment variable to use gcc-11
ENV CC=/usr/bin/gcc-11

# Copy the current directory contents into the container
COPY . .

# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Set the working directory for the GroundingDINO ops
WORKDIR /usr/src/app/models/GroundingDINO/ops

# Run the setup script and the test script
RUN python setup.py build install
RUN python test.py # This should result in 6 lines of * True

# Install Gradio
RUN pip install gradio

# Change back to the original working directory
WORKDIR /usr/src/app

# Expose the port Gradio will run on
EXPOSE 7860

# Default command to run the Gradio app
CMD ["python", "app.py"]
15 changes: 15 additions & 0 deletions annolid/detector/countgd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
https://github.com/niki-amini-naieni/CountGD/tree/main
https://huggingface.co/spaces/nikigoli/countgd
@InProceedings{AminiNaieni24,
author = "Amini-Naieni, N. and Han, T. and Zisserman, A.",
title = "CountGD: Multi-Modal Open-World Counting",
booktitle = "Advances in Neural Information Processing Systems (NeurIPS)",
year = "2024",
}
---
title: CountGD_Multi-Modal_Open-World_Counting
app_file: app.py
sdk: gradio
sdk_version: 4.44.1
---
nohup python -u app.py &
Empty file.
118 changes: 118 additions & 0 deletions annolid/detector/countgd/cfg_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
data_aug_scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
data_aug_max_size = 1333
data_aug_scales2_resize = [400, 500, 600]
data_aug_scales2_crop = [384, 600]
data_aug_scale_overlap = None
batch_size = 4
modelname = 'groundingdino'
backbone = "swin_B_384_22k"
position_embedding = 'sine'
pe_temperatureH = 20
pe_temperatureW = 20
return_interm_indices = [1, 2, 3]
enc_layers = 6
dec_layers = 6
pre_norm = False
dim_feedforward = 2048
hidden_dim = 256
dropout = 0.0
nheads = 8
num_queries = 900
query_dim = 4
num_patterns = 0
num_feature_levels = 4
enc_n_points = 4
dec_n_points = 4
two_stage_type = 'standard'
two_stage_bbox_embed_share = False
two_stage_class_embed_share = False
transformer_activation = 'relu'
dec_pred_bbox_embed_share = True
dn_box_noise_scale = 1.0
dn_label_noise_ratio = 0.5
dn_label_coef = 1.0
dn_bbox_coef = 1.0
embed_init_tgt = True
dn_labelbook_size = 91
max_text_len = 256
text_encoder_type = "bert-base-uncased"
use_text_enhancer = True
use_fusion_layer = True
use_checkpoint = False
use_transformer_ckpt = False
use_text_cross_attention = True
text_dropout = 0.0
fusion_dropout = 0.0
fusion_droppath = 0.1
sub_sentence_present = True
max_labels = 90 # pos + neg
lr = 0.0001 # base learning rate
backbone_freeze_keywords = None # only for gdino backbone
freeze_keywords = ['backbone.0', 'bert'] # for whole model, e.g. ['backbone.0', 'bert'] for freeze visual encoder and text encoder
lr_backbone = 1e-05 # specific learning rate
lr_backbone_names = ['backbone.0', 'bert']
lr_linear_proj_mult = 1e-05
lr_linear_proj_names = ['ref_point_head', 'sampling_offsets']
weight_decay = 0.0001
param_dict_type = 'ddetr_in_mmdet'
ddetr_lr_param = False
epochs = 30
lr_drop = 10
save_checkpoint_interval = 10
clip_max_norm = 0.1
onecyclelr = False
multi_step_lr = False
lr_drop_list = [10, 20]
frozen_weights = None
dilation = False
pdetr3_bbox_embed_diff_each_layer = False
pdetr3_refHW = -1
random_refpoints_xy = False
fix_refpoints_hw = -1
dabdetr_yolo_like_anchor_update = False
dabdetr_deformable_encoder = False
dabdetr_deformable_decoder = False
use_deformable_box_attn = False
box_attn_type = 'roi_align'
dec_layer_number = None
decoder_layer_noise = False
dln_xy_noise = 0.2
dln_hw_noise = 0.2
add_channel_attention = False
add_pos_value = False
two_stage_pat_embed = 0
two_stage_add_query_num = 0
two_stage_learn_wh = False
two_stage_default_hw = 0.05
two_stage_keep_all_tokens = False
num_select = 900
batch_norm_type = 'FrozenBatchNorm2d'
masks = False
aux_loss = True
set_cost_class = 5.0
set_cost_bbox = 1.0
set_cost_giou = 0.0
cls_loss_coef = 5.0
bbox_loss_coef = 1.0
giou_loss_coef = 0.0
enc_loss_coef = 1.0
interm_loss_coef = 1.0
no_interm_box_loss = False
mask_loss_coef = 1.0
dice_loss_coef = 1.0
focal_alpha = 0.25
focal_gamma = 2.0
decoder_sa_type = 'sa'
matcher_type = 'HungarianMatcher'
decoder_module_seq = ['sa', 'ca', 'ffn']
nms_iou_threshold = -1
dec_pred_class_embed_share = True
match_unstable_error = True
use_detached_boxes_dec_out = False
dn_scalar = 100

box_threshold = 0.23
text_threshold = 0
use_coco_eval = False
label_list = ['alcohol bottle', 'baguette roll', 'ball', 'banana', 'bead', 'bee', 'birthday candle', 'biscuit', 'boat', 'bottle', 'bowl', 'box', 'bread roll', 'brick', 'buffalo', 'bun', 'calamari ring', 'can', 'candle', 'cap', 'car', 'cartridge', 'cassette', 'cement bag', 'cereal', 'chewing gum piece', 'chopstick', 'clam', 'coffee bean', 'coin', 'cotton ball', 'cow', 'crane', 'crayon', 'croissant', 'crow', 'cup', 'cupcake', 'cupcake holder', 'fish', 'gemstone', 'go game piece', 'goat', 'goldfish snack', 'goose', 'ice cream', 'ice cream cone', 'instant noodle', 'jade stone', 'jeans', 'kidney bean', 'kitchen towel', 'lighter', 'lipstick', 'm&m piece', 'macaron', 'match', 'meat skewer', 'mini blind', 'mosaic tile', 'naan bread', 'nail', 'nut', 'onion ring', 'orange', 'pearl', 'pen', 'pencil', 'penguin', 'pepper', 'person', 'pigeon', 'plate', 'polka dot tile', 'potato', 'rice bag', 'roof tile', 'screw', 'shoe', 'spoon', 'spring roll', 'stair', 'stapler pin', 'straw', 'supermarket shelf', 'swan', 'tomato', 'watermelon', 'window', 'zebra']
val_label_list = ["apple", "candy piece", "carrom board piece", "cashew nut", "comic book", "crab cake", "deer", "egg", "elephant", "finger food", "green pea", "hot air balloon", "keyboard key", "lego", "marble", "marker", "nail polish", "potato chip", "red bean", "round dessert", "sauce bottle", "sea shell", "sheep", "ski", "stamp", "sticky note", "strawberry", "sunglasses", "tree log", "watch"]
1 change: 1 addition & 0 deletions annolid/detector/countgd/checkpoints/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Please download checkpoints files from https://huggingface.co/spaces/nikigoli/countgd/tree/main/checkpoints and put in this folder.
26 changes: 26 additions & 0 deletions annolid/detector/countgd/checkpoints/bert-base-uncased/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"_name_or_path": "bert-base-uncased",
"architectures": [
"BertModel"
],
"attention_probs_dropout_prob": 0.1,
"classifier_dropout": null,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-12,
"max_position_embeddings": 512,
"model_type": "bert",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"position_embedding_type": "absolute",
"torch_dtype": "float32",
"transformers_version": "4.39.1",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"cls_token": "[CLS]",
"mask_token": "[MASK]",
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"unk_token": "[UNK]"
}
Loading

0 comments on commit 3debde1

Please sign in to comment.