forked from alibaba/EasyCV
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dino_4sc_swinl.py
95 lines (95 loc) · 3.08 KB
/
dino_4sc_swinl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# model settings
model = dict(
type='Detection',
pretrained=
'https://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/EasyCV/modelzoo/classification/timm/swint/warpper_swin_large_patch4_window12_384_22k.pth',
backbone=dict(
type='SwinTransformer',
pretrain_img_size=384,
embed_dim=192,
depths=[2, 2, 18, 2],
num_heads=[6, 12, 24, 48],
window_size=12,
out_indices=(1, 2, 3),
use_checkpoint=True),
head=dict(
type='DINOHead',
transformer=dict(
type='DeformableTransformer',
d_model=256,
nhead=8,
num_queries=900,
num_encoder_layers=6,
num_unicoder_layers=0,
num_decoder_layers=6,
dim_feedforward=2048,
dropout=0.0,
activation='relu',
normalize_before=False,
return_intermediate_dec=True,
query_dim=4,
num_patterns=0,
modulate_hw_attn=True,
# for deformable encoder
deformable_encoder=True,
deformable_decoder=True,
num_feature_levels=4,
enc_n_points=4,
dec_n_points=4,
# init query
decoder_query_perturber=None,
add_channel_attention=False,
random_refpoints_xy=False,
# two stage
two_stage_type=
'standard', # ['no', 'standard', 'early', 'combine', 'enceachlayer', 'enclayer1']
two_stage_pat_embed=0,
two_stage_add_query_num=0,
two_stage_learn_wh=False,
two_stage_keep_all_tokens=False,
# evo of #anchors
dec_layer_number=None,
rm_dec_query_scale=True,
rm_self_attn_layers=None,
key_aware_type=None,
# layer share
layer_share_type=None,
# for detach
rm_detach=None,
decoder_sa_type='sa',
module_seq=['sa', 'ca', 'ffn'],
# for dn
embed_init_tgt=True,
use_detached_boxes_dec_out=False),
dn_components=dict(
dn_number=100,
dn_label_noise_ratio=0.5, # paper 0.5, release code 0.25
dn_box_noise_scale=1.0,
dn_labelbook_size=80,
),
num_classes=80,
in_channels=[384, 768, 1536],
embed_dims=256,
query_dim=4,
num_queries=900,
num_select=300,
random_refpoints_xy=False,
num_patterns=0,
fix_refpoints_hw=-1,
num_feature_levels=4,
# two stage
two_stage_type='standard', # ['no', 'standard']
two_stage_add_query_num=0,
dec_pred_class_embed_share=True,
dec_pred_bbox_embed_share=True,
two_stage_class_embed_share=False,
two_stage_bbox_embed_share=False,
decoder_sa_type='sa',
temperatureH=20,
temperatureW=20,
cost_dict=dict(
cost_class=2,
cost_bbox=5,
cost_giou=2,
),
weight_dict=dict(loss_ce=1, loss_bbox=5, loss_giou=2)))