-
Notifications
You must be signed in to change notification settings - Fork 26
/
fastervqa-mt.yml
119 lines (101 loc) · 3.03 KB
/
fastervqa-mt.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
name: FasterVQA-MT
num_epochs: 30
l_num_epochs: 0
warmup_epochs: 2.5
ema: true
save_model: true
batch_size: 16
num_workers: 6
wandb:
project_name: VQA_New_Exp
data:
val-kv1k:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/KoNViD/labels.txt
data_prefix: ../datasets/KoNViD/
sample_types:
fragments:
fragments_h: 7
fragments_w: 7
fsize_h: 32
fsize_w: 32
aligned: 8
clip_len: 16
frame_interval: 2
t_frag: 4
num_clips: 1
val-livevqc:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/LIVE_VQC/labels.txt
data_prefix: ../datasets/LIVE_VQC/
sample_types:
fragments:
fragments_h: 7 #7
fragments_w: 7 #7
fsize_h: 32
fsize_w: 32
aligned: 8
clip_len: 16 #32
frame_interval: 2
t_frag: 4 #8
num_clips: 1
val-l1080p:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/LSVQ/labels_1080p.txt
data_prefix: ../datasets/LSVQ/
sample_types:
#resize:
# size_h: 224
# size_w: 224
fragments:
fragments_h: 7 #7
fragments_w: 7 #7
fsize_h: 32
fsize_w: 32
aligned: 8
clip_len: 16 #32
frame_interval: 2
t_frag: 4 #8
num_clips: 1
val-ltest:
type: FusionDataset
args:
phase: test
anno_file: ./examplar_data_labels/LSVQ/labels_test.txt
data_prefix: ../datasets/LSVQ/
sample_types:
fragments:
fragments_h: 7 #7
fragments_w: 7 #7
fsize_h: 32
fsize_w: 32
aligned: 8
clip_len: 16
frame_interval: 2
t_frag: 4
num_clips: 1
model:
type: DiViDeAddEvaluator
args:
backbone:
fragments:
checkpoint: false
pretrained:
backbone_size: swin_tiny_grpb
backbone_preserve_keys: fragments
divide_head: false
vqa_head:
in_channels: 768
hidden_channels: 64
optimizer:
lr: !!float 1e-3
backbone_lr_mult: !!float 1e-1
wd: 0.05
load_path: ../pretrained/swin_tiny_patch244_window877_kinetics400_1k.pth
test_load_path: ./pretrained_weights/FAST_VQA_3D_1*1.pth