forked from ottokart/punctuator2
-
Notifications
You must be signed in to change notification settings - Fork 9
/
parameters.yaml
40 lines (40 loc) · 1.47 KB
/
parameters.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Data directory with subdirectories "train_samples", "dev_samples" and "test_samples" and "test_groundtruth".
DATA_DIR: /Users/alp/Documents/Corpora/ted_data/punkProse_corpus/corpus
FEATURE_VOCABULARIES: # vocabulary files of vocabularized features. files should be in the DATA_DIR
word: vocabulary.txt
pos: pos_vocabulary.txt
OUTPUT_LABEL: punctuation_before #label in the data to predict
NUM_HIDDEN_OUTPUT: 100 #Output hidden layer size
FEATURE_NUM_HIDDEN: #Hidden layer size of each feature layer
word: 100
pos: 10
pause_before: 2
pause_before_norm: 2
f0_mean: 100
f0_range: 100
f0_birange: 100
f0_sd: 100
i0_mean: 100
i0_range: 100
i0_birange: 100
i0_sd: 100
speech_rate_norm: 100
FEATURE_EMB_SIZE: #embedded vector size of vocabularized features
word: 100
pos: 10
pause_before: 10
f0_mean: 10
BIDIRECTIONAL_FEATURES: #features to train bidirectionally
- word
- f0_mean
- i0_mean
LEVELED_FEATURES: #prosodic features discretization is made with a text file with the levels listed.
# pause_before: level_info/pause_levels_100.txt
# f0_mean: level_info/semitone_levels_100.txt
# f0_range: level_info/semitone_levels_100.txt
# i0_mean: level_info/semitone_levels_100.txt
# i0_range: level_info/semitone_levels_100.txt
# speech_rate_norm: level_info/sr_levels_100.txt
LEARNING_RATE: 0.05 #learning rate
BATCH_SIZE: 128 #batch size
SAMPLE_SIZE: 50 #maximum sequence size in the dataset. Samples with size less than 50 are padded with empty tokens.