-
Notifications
You must be signed in to change notification settings - Fork 0
/
archs_2.txt
80 lines (80 loc) · 68 KB
/
archs_2.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
Now you should run one of the following depending on your shell
source /share/apps/python/miniconda4.12/etc/profile.d/conda.sh
source /share/apps/python/miniconda4.12/etc/profile.d/conda.csh
Archs in range 50-60M params
Params: 59.689828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.671396 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.302756 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 57.736036 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.302756 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.51018 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.284324 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.104676 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.897252 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 57.717604 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.104676 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.915684 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 57.330532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.104676 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.671396 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.123108 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 57.330532 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.51018 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.284324 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 56.925028 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 57.717604 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.51018 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.915684 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.689828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.689828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 54.565732 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.897252 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.87882 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.491748 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.51018 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.70826 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.284324 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.897252 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.897252 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.671396 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.284324 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 59.689828 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 58.897252 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 56.537956 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 56.150884 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Archs in range 60-70M params
Params: 60.058468 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.482404 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 66.380644 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 65.97514 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.049124 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.869476 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 63.228772 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 67.965796 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 64.40842 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.454628 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.482404 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.454628 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.0769 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.869476 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 63.228772 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 61.64362 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.887908 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 64.813924 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.869476 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 66.380644 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.482404 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 60.463972 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 64.021348 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 65.200996 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 64.426852 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.049124 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 67.17322 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.436196 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 62.049124 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 65.993572 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Archs in range 70-90M params
Params: 70.712164 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 70.712164 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 71.891812 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}
Params: 70.712164 Arch: {'0': {'atten_out': 768, 'inter_hidden': 3072, 'residual_hidden': [1020]}, '1': {'atten_out': 768, 'inter_hidden': 1020, 'residual_hidden': [1020]}, '2': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '3': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '4': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '5': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, '6': {'atten_out': 768, 'inter_hidden': 768, 'residual_hidden': [1020]}, '7': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '8': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '9': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '10': {'atten_out': 768, 'inter_hidden': 2304, 'residual_hidden': [1020]}, '11': {'atten_out': 768, 'inter_hidden': 1536, 'residual_hidden': [1020]}, 'remove_layer_idx': []}