-
Notifications
You must be signed in to change notification settings - Fork 1
/
bert_pretrained_from_preprocessed_config.yaml
94 lines (82 loc) · 2.88 KB
/
bert_pretrained_from_preprocessed_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Copyright (c) 2021 Peptone.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# BERT Pretraining from Preprocessed (tokenized) data
name: &name PretrainingBERTFromPreprocessed
trainer:
gpus: 7 # the number of gpus, 0 for CPU, or list with gpu indices
num_nodes: 1
max_steps: 49874565 # precedence over max_epochs
max_epochs: 10
num_sanity_val_steps: 1000 # needed for bert pretraining from preproc
replace_sampler_ddp: false # needed for bert pretraining from preproc
accumulate_grad_batches: 1 # accumulates grads every k batches
precision: 16 # 16 to use AMP
accelerator: ddp
gradient_clip_val: 1.0
log_every_n_steps: 1
val_check_interval: 1.0 # check once per epoch .25 for 4 times per epoch
checkpoint_callback: false # provided by exp_manager
logger: false # provided by exp_manager
model:
nemo_path: 'bert_base_proteonemo.nemo' # exported .nemo path
only_mlm_loss: true # only use masked language model without next sentence prediction
num_tok_classification_layers: 1 # number of token classification head output layers
num_seq_classification_layers: 2 # number of sequence classification head output layers
language_model:
pretrained_model_name: bert-base-uncased # huggingface model name
lm_checkpoint: null
config:
attention_probs_dropout_prob: 0.1
hidden_act: gelu
hidden_dropout_prob: 0.1
hidden_size: 768
initializer_range: 0.02
intermediate_size: 3072
max_position_embeddings: 1024
num_attention_heads: 12
num_hidden_layers: 12
type_vocab_size: 2
vocab_size: 31
config_file: null # json file, precedence over config
tokenizer: null
train_ds:
data_file: '/data/UNIREF50/hdf5_seq_len_1024_max_pred_160_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/uniref_50' # path to hdf5 file (or directory)
max_predictions_per_seq: 160
batch_size: 8
shuffle: true
num_samples: -1
num_workers: 8
drop_last: false
pin_memory: false
optim:
name: adamw
lr: 0.4375e-4
weight_decay: 0.01
sched:
name: SquareRootAnnealing
warmup_steps: null
warmup_ratio: 0.01
min_lr: 0.0
last_epoch: -1
exp_manager:
exp_dir: null # where to store logs and checkpoints
name: *name # name of experimentvim
create_tensorboard_logger: True
create_checkpoint_callback: True
hydra:
run:
dir: .
job_logging:
root:
handlers: null