system.config

### use # to comment out the configure item

################ Status ################
mode=train
# string: train/test/interactive_predict/api_service

################ Datasets(Input/Output) ################
datasets_fold=data/example_datasets2
train_file=train.csv
dev_file=dev.csv
test_file=test.csv

delimiter=b
# string: (t: "\t";"table")|(b: "backspace";" ")|(other, e.g., '|||', ...)

use_pretrained_embedding=False
token_emb_dir=data/example_datasets2/word.emb

vocabs_dir=data/example_datasets2/vocabs

log_dir=data/example_datasets2/logs

checkpoints_dir=checkpoints/BILSTM-CRFs-datasets2

################ Labeling Scheme ################
label_scheme=BIO
# string: BIO/BIESO

label_level=2
# int, 1:BIO/BIESO; 2:BIO/BIESO + suffix
# max to 2

hyphen=-
# string: -|_, for connecting the prefix and suffix: `B_PER', `I_LOC'

suffix=[ORG,PER,LOC]
# unnecessary if label_level=1

labeling_level=char
# string: word/char
# for English: （word: hello），（char: h）
# for Chinese: （word: 你好），（char: 你）

measuring_metrics=[precision,recall,f1,accuracy]
# string: accuracy|precision|recall|f1
# f1 is compulsory
################ Model Configuration ################
use_crf=True

cell_type=LSTM
# LSTM, GRU
biderectional=True
encoder_layers=1

embedding_dim=200
#int, must be consistent with `token_emb_dir' file

hidden_dim=200

max_sequence_length=300
#int, cautions! set as a LARGE number as possible,
# this will be kept during training and inferring, text having length larger than this will be truncated.

use_self_attention=True
attention_dim=400
# unnecessary if use_self_attention=False

CUDA_VISIBLE_DEVICES=0
# coincides with tf.CUDA_VISIBLE_DEVICES

seed=42

################ Training Settings ###
epoch=300
batch_size=32

dropout=0.5
learning_rate=0.001

optimizer=Adam
#string: GD/Adagrad/AdaDelta/RMSprop/Adam

checkpoints_max_to_keep=3
print_per_batch=20

is_early_stop=True
patient=15
# unnecessary if is_early_stop=False

checkpoint_name=model

################ Testing Settings ###
output_test_file=test.out

is_output_sentence_entity=True
output_sentence_entity_file=test.entity.out
# unnecessary if is_output_sentence_entity=False


################ Api service Settings ###

ip=0.0.0.0
port=8000
# unnecessary to change if keep these as default.
# unnecessary to add not at api_service mode.