-
Notifications
You must be signed in to change notification settings - Fork 0
/
archs_3.txt
116 lines (116 loc) · 102 KB
/
archs_3.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
Archs in range 30-40M params
Params: 39.43306 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Archs in range 40-50M params
Params: 49.38634 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.745892 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.269604 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.371108 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 46.572388 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.269604 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.66282 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.882532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.715172 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.8641 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.520036 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 45.417316 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 46.154596 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 46.517092 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.303524 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.807332 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 46.566244 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.653732 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.75498 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.656676 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.876388 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.876388 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.89482 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.748836 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.730404 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 46.59082 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.75818 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.027044 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.027044 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.49546 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.711972 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.407972 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 47.69674 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.114532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.163684 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.705828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 45.36202 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 46.553956 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 49.742692 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 48.139108 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Archs in range 50-60M params
Params: 58.75594 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.086756 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.634532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.997028 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.768228 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 52.132708 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.934628 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.934628 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 55.610212 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.232484 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 54.45514 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 55.972708 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 55.28458 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 52.906852 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.674852 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.631844 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.835812 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 52.0897 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.792804 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.8609 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.680996 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.762084 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.18602 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 51.38314 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 52.046692 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.644132 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.941732 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.25706 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.03146 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.000228 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 53.263204 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.013028 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 55.984996 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 52.464484 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 50.95306 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 56.857444 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.990884 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 54.498148 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 56.058724 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 55.22314 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Archs in range 60-70M params
Params: 61.133668 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.32266 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 68.334436 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.761828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.344036 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 63.929188 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.712676 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.365668 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 64.666468 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.371812 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 63.572836 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.145956 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.170532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.3809 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.420964 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.127524 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.1521 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.533028 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.35018 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.176676 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.28874 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 63.062884 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.170532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.368612 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.457828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 256, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 63.54826 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '2': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.907812 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.496164 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 512, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 65.182564 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.009316 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 512, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 256, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 512, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Archs in range 70-90M params