Add samples page

rhasspy · Mar 26, 2021 · 8f3b3e0 · 8f3b3e0
1 parent 13311e2
commit 8f3b3e0
Show file tree

Hide file tree

Showing 346 changed files with 5,173 additions and 0 deletions.
diff --git a/index.html b/index.html
diff --git a/local/de-de/thorsten-glow_tts/README.md b/local/de-de/thorsten-glow_tts/README.md
@@ -0,0 +1,5 @@
+# German Larynx Voice (thorsten)
+
+[GlowTTS](https://github.com/rhasspy/glow-tts-train) model trained from the [thorsten dataset](https://github.com/thorstenMueller/deep-learning-german-tts/).
+
+Intended to be used with [gruut](https://github.com/rhasspy/gruut) (`de-de`).
diff --git a/local/de-de/thorsten-glow_tts/config.json b/local/de-de/thorsten-glow_tts/config.json
@@ -0,0 +1,57 @@
+{
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.00026855278715645745,
+    "betas": [
+        0.9,
+        0.98
+    ],
+    "eps": 1e-09,
+    "grad_clip": 5.0,
+    "warmup_steps": 4000,
+    "scheduler": "noam",
+    "batch_size": 32,
+    "fp16_run": false,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0.0,
+        "mel_fmax": 8000.0,
+        "signal_norm": false,
+        "convert_db_to_amp": false,
+        "do_dynamic_range_compression": false
+    },
+    "model": {
+        "num_symbols": 54,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "filter_channels_dp": 256,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "n_blocks_dec": 12,
+        "n_layers_enc": 6,
+        "n_heads": 2,
+        "p_dropout_dec": 0.05,
+        "dilation_rate": 1,
+        "kernel_size_dec": 5,
+        "n_block_layers": 4,
+        "n_sqz": 2,
+        "prenet": true,
+        "mean_only": true,
+        "hidden_channels_enc": 192,
+        "hidden_channels_dec": 192,
+        "window_size": 4,
+        "n_speakers": 1,
+        "n_split": 4,
+        "sigmoid_scale": false,
+        "block_length": null,
+        "gin_channels": 0
+    },
+    "version": 1,
+    "git_commit": "a303dc3"
+}
diff --git a/local/de-de/thorsten-glow_tts/phonemes.txt b/local/de-de/thorsten-glow_tts/phonemes.txt
@@ -0,0 +1,54 @@
+0 _
+1 |
+2 ‖
+3 #
+4 a
+5 aɪ̯
+6 aʊ̯
+7 aː
+8 b
+9 d
+10 d͡ʒ
+11 eː
+12 f
+13 g
+14 h
+15 iː
+16 j
+17 k
+18 l
+19 m
+20 n
+21 oː
+22 p
+23 p͡f
+24 s
+25 t
+26 t͡s
+27 t͡ʃ
+28 uː
+29 v
+30 x
+31 yː
+32 z
+33 ãː
+34 ç
+35 õː
+36 øː
+37 ŋ
+38 œ
+39 ɐ
+40 ɔ
+41 ɔʏ̯
+42 ə
+43 ɛ
+44 ɛː
+45 ɛ̃ː
+46 ɪ
+47 ʁ
+48 ʃ
+49 ʊ
+50 ʏ
+51 ʒ
+52 ʔ
+53 χ
diff --git a/local/de-de/thorsten-glow_tts/samples/fischers_fritze_fischt.wav b/local/de-de/thorsten-glow_tts/samples/fischers_fritze_fischt.wav
diff --git a/local/de-de/thorsten-glow_tts/samples/haben_sie_ein_vegetarisches.wav b/local/de-de/thorsten-glow_tts/samples/haben_sie_ein_vegetarisches.wav
diff --git a/local/de-de/thorsten-glow_tts/samples/ich_bin_allergisch.wav b/local/de-de/thorsten-glow_tts/samples/ich_bin_allergisch.wav
diff --git a/local/de-de/thorsten-glow_tts/samples/konnen_sie_bitte.wav b/local/de-de/thorsten-glow_tts/samples/konnen_sie_bitte.wav
diff --git a/local/de-de/thorsten-glow_tts/samples/mir_geht_es_gut.wav b/local/de-de/thorsten-glow_tts/samples/mir_geht_es_gut.wav
diff --git a/local/de-de/thorsten-glow_tts/samples/test_phonemes.csv b/local/de-de/thorsten-glow_tts/samples/test_phonemes.csv
@@ -0,0 +1,5 @@
+konnen_sie_bitte|3 17 38 20 42 20 3 32 15 3 8 46 25 42 3 18 4 37 32 7 19 39 3 48 22 47 43 34 20 3 2
+mir_geht_es_gut|3 19 15 39 3 13 11 25 3 52 11 24 3 13 28 25 3 1 3 9 4 37 17 42 3 2
+haben_sie_ein_vegetarisches|3 14 7 8 42 20 3 32 15 3 5 20 3 29 11 13 11 25 4 47 46 48 42 24 3 13 42 47 46 34 25 3 2
+ich_bin_allergisch|3 46 34 3 8 46 20 3 52 4 18 18 43 47 13 46 48 3 2
+fischers_fritze_fischt|3 12 46 48 43 47 24 3 12 47 46 25 24 42 3 12 46 48 25 3 12 47 46 48 42 3 12 46 48 42 3 1 3 12 47 46 48 42 3 12 46 48 42 3 12 46 48 25 3 12 46 48 43 47 24 3 12 47 46 25 24 42 3 2
diff --git a/local/de-de/thorsten-glow_tts/samples/test_sentences.txt b/local/de-de/thorsten-glow_tts/samples/test_sentences.txt
@@ -0,0 +1,5 @@
+konnen_sie_bitte|Können Sie bitte langsamer sprechen?
+mir_geht_es_gut|Mir geht es gut, danke!
+haben_sie_ein_vegetarisches|Haben Sie ein vegetarisches Gericht?
+ich_bin_allergisch|Ich bin allergisch.
+fischers_fritze_fischt|Fischers Fritze fischt frische Fische; Frische Fische fischt Fischers Fritze.
diff --git a/local/en-us/blizzard_fls-glow_tts/README.md b/local/en-us/blizzard_fls-glow_tts/README.md
@@ -0,0 +1,5 @@
+# English Larynx Voice (blizzard_fls)
+
+[GlowTTS](https://github.com/rhasspy/glow-tts-train) model trained from the [Blizzard 2017 Challenge dataset](https://www.synsig.org/index.php/Blizzard_Challenge_2017).
+
+Intended to be used with [gruut](https://github.com/rhasspy/gruut) (`en-us`).
diff --git a/local/en-us/blizzard_fls-glow_tts/config.json b/local/en-us/blizzard_fls-glow_tts/config.json
@@ -0,0 +1,64 @@
+{
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.0005261910343112216,
+    "betas": [
+        0.9,
+        0.98
+    ],
+    "eps": 1e-09,
+    "grad_clip": 5.0,
+    "warmup_steps": 4000,
+    "scheduler": "noam",
+    "batch_size": 32,
+    "fp16_run": false,
+    "min_seq_length": null,
+    "max_seq_length": null,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0.0,
+        "mel_fmax": 8000.0,
+        "ref_level_db": 20.0,
+        "spec_gain": 1.0,
+        "signal_norm": true,
+        "min_level_db": -100.0,
+        "max_norm": 1.0,
+        "clip_norm": true,
+        "symmetric_norm": true
+    },
+    "model": {
+        "num_symbols": 46,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "filter_channels_dp": 256,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "n_blocks_dec": 12,
+        "n_layers_enc": 6,
+        "n_heads": 2,
+        "p_dropout_dec": 0.05,
+        "dilation_rate": 1,
+        "kernel_size_dec": 5,
+        "n_block_layers": 4,
+        "n_sqz": 2,
+        "prenet": true,
+        "mean_only": true,
+        "hidden_channels_enc": 192,
+        "hidden_channels_dec": 192,
+        "window_size": 4,
+        "n_speakers": 1,
+        "n_split": 4,
+        "sigmoid_scale": false,
+        "block_length": null,
+        "gin_channels": 0,
+        "n_frames_per_step": 1
+    },
+    "version": 1,
+    "git_commit": "b05e772"
+}
diff --git a/local/en-us/blizzard_fls-glow_tts/phonemes.txt b/local/en-us/blizzard_fls-glow_tts/phonemes.txt
@@ -0,0 +1,46 @@
+0 _
+1 |
+2 ‖
+3 #
+4 ˈ
+5 ˌ
+6 aɪ
+7 aʊ
+8 b
+9 d
+10 d͡ʒ
+11 eɪ
+12 f
+13 h
+14 i
+15 j
+16 k
+17 l
+18 m
+19 n
+20 oʊ
+21 p
+22 s
+23 t
+24 t͡ʃ
+25 u
+26 v
+27 w
+28 z
+29 æ
+30 ð
+31 ŋ
+32 ɑ
+33 ɔ
+34 ɔɪ
+35 ə
+36 ɚ
+37 ɛ
+38 ɡ
+39 ɪ
+40 ɹ
+41 ʃ
+42 ʊ
+43 ʌ
+44 ʒ
+45 θ
diff --git a/local/en-us/blizzard_fls-glow_tts/samples/be_a_voice_not_an_echo.wav b/local/en-us/blizzard_fls-glow_tts/samples/be_a_voice_not_an_echo.wav
diff --git a/local/en-us/blizzard_fls-glow_tts/samples/im_sorry_dave.wav b/local/en-us/blizzard_fls-glow_tts/samples/im_sorry_dave.wav
diff --git a/...l/en-us/blizzard_fls-glow_tts/samples/it_took_me_quite_a_long_time_to_develop_a_voice.wav b/...l/en-us/blizzard_fls-glow_tts/samples/it_took_me_quite_a_long_time_to_develop_a_voice.wav
diff --git a/local/en-us/blizzard_fls-glow_tts/samples/prior_to_november.wav b/local/en-us/blizzard_fls-glow_tts/samples/prior_to_november.wav
diff --git a/local/en-us/blizzard_fls-glow_tts/samples/test_phonemes.csv b/local/en-us/blizzard_fls-glow_tts/samples/test_phonemes.csv
@@ -0,0 +1,5 @@
+it_took_me_quite_a_long_time_to_develop_a_voice|3 4 39 23 3 23 4 42 16 3 18 4 14 3 16 27 4 6 23 3 35 3 17 4 33 31 3 23 4 6 18 3 23 4 25 3 9 39 26 4 37 17 35 21 3 35 3 26 4 34 22 3 1 3 35 19 9 3 19 4 7 3 30 4 29 23 3 4 6 3 13 4 29 26 3 4 39 23 3 4 6 18 3 19 4 32 23 3 38 4 20 39 31 3 23 4 25 3 8 4 14 3 22 4 6 17 35 19 23 3 2
+be_a_voice_not_an_echo|3 8 4 14 3 35 3 26 4 34 22 3 1 3 19 4 32 23 3 35 19 3 4 37 16 20 3 2
+im_sorry_dave|3 4 6 18 3 22 4 32 40 14 3 9 4 11 26 3 2 4 6 18 3 35 12 40 4 11 9 3 4 6 3 16 4 29 19 23 3 9 4 25 3 30 4 29 23 3 2
+this_cake_is_great|3 30 4 39 22 3 16 4 11 16 3 4 39 28 3 38 40 4 11 23 3 2 4 39 23 22 3 22 4 20 3 9 39 17 4 39 41 35 22 3 35 19 9 3 18 4 34 22 23 3 2
+prior_to_november|3 21 40 4 6 36 3 23 4 25 3 19 20 26 4 37 18 8 36 3 23 27 4 37 19 23 14 3 22 4 37 16 35 19 9 3 1 3 19 4 6 19 23 4 14 19 3 22 4 39 16 22 23 14 3 45 40 4 14 3 2
diff --git a/local/en-us/blizzard_fls-glow_tts/samples/test_sentences.txt b/local/en-us/blizzard_fls-glow_tts/samples/test_sentences.txt
@@ -0,0 +1,5 @@
+it_took_me_quite_a_long_time_to_develop_a_voice|It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.
+be_a_voice_not_an_echo|Be a voice, not an echo.
+im_sorry_dave|I'm sorry Dave. I'm afraid I can't do that.
+this_cake_is_great|This cake is great. It's so delicious and moist.
+prior_to_november|Prior to November 22_ordinal, 1963_year.
diff --git a/local/en-us/blizzard_fls-glow_tts/samples/this_cake_is_great.wav b/local/en-us/blizzard_fls-glow_tts/samples/this_cake_is_great.wav
diff --git a/local/en-us/cmu_aew-glow_tts/README.md b/local/en-us/cmu_aew-glow_tts/README.md
@@ -0,0 +1,5 @@
+# English Larynx Voice (aew)
+
+[GlowTTS](https://github.com/rhasspy/glow-tts-train) model trained from the [CMU Arctic](http://www.festvox.org/cmu_arctic/).
+
+Intended to be used with [gruut](https://github.com/rhasspy/gruut) (`en-us`).
diff --git a/local/en-us/cmu_aew-glow_tts/config.json b/local/en-us/cmu_aew-glow_tts/config.json
@@ -0,0 +1,64 @@
+{
+    "seed": 1234,
+    "epochs": 1000,
+    "learning_rate": 0.0003184746968987751,
+    "betas": [
+        0.9,
+        0.98
+    ],
+    "eps": 1e-09,
+    "grad_clip": 5.0,
+    "warmup_steps": 4000,
+    "scheduler": "noam",
+    "batch_size": 32,
+    "fp16_run": false,
+    "min_seq_length": null,
+    "max_seq_length": 153,
+    "audio": {
+        "filter_length": 1024,
+        "hop_length": 256,
+        "win_length": 1024,
+        "mel_channels": 80,
+        "sample_rate": 22050,
+        "sample_bytes": 2,
+        "channels": 1,
+        "mel_fmin": 0.0,
+        "mel_fmax": 8000.0,
+        "ref_level_db": 20.0,
+        "spec_gain": 1.0,
+        "signal_norm": true,
+        "min_level_db": -100.0,
+        "max_norm": 1.0,
+        "clip_norm": true,
+        "symmetric_norm": true
+    },
+    "model": {
+        "num_symbols": 46,
+        "hidden_channels": 192,
+        "filter_channels": 768,
+        "filter_channels_dp": 256,
+        "kernel_size": 3,
+        "p_dropout": 0.1,
+        "n_blocks_dec": 12,
+        "n_layers_enc": 6,
+        "n_heads": 2,
+        "p_dropout_dec": 0.05,
+        "dilation_rate": 1,
+        "kernel_size_dec": 5,
+        "n_block_layers": 4,
+        "n_sqz": 2,
+        "prenet": true,
+        "mean_only": true,
+        "hidden_channels_enc": 192,
+        "hidden_channels_dec": 192,
+        "window_size": 4,
+        "n_speakers": 1,
+        "n_split": 4,
+        "sigmoid_scale": false,
+        "block_length": null,
+        "gin_channels": 0,
+        "n_frames_per_step": 1
+    },
+    "version": 1,
+    "git_commit": "b05e772"
+}
diff --git a/local/en-us/cmu_aew-glow_tts/phonemes.txt b/local/en-us/cmu_aew-glow_tts/phonemes.txt
@@ -0,0 +1,46 @@
+0 _
+1 |
+2 ‖
+3 #
+4 ˈ
+5 ˌ
+6 aɪ
+7 aʊ
+8 b
+9 d
+10 d͡ʒ
+11 eɪ
+12 f
+13 h
+14 i
+15 j
+16 k
+17 l
+18 m
+19 n
+20 oʊ
+21 p
+22 s
+23 t
+24 t͡ʃ
+25 u
+26 v
+27 w
+28 z
+29 æ
+30 ð
+31 ŋ
+32 ɑ
+33 ɔ
+34 ɔɪ
+35 ə
+36 ɚ
+37 ɛ
+38 ɡ
+39 ɪ
+40 ɹ
+41 ʃ
+42 ʊ
+43 ʌ
+44 ʒ
+45 θ
diff --git a/local/en-us/cmu_aew-glow_tts/samples/be_a_voice_not_an_echo.wav b/local/en-us/cmu_aew-glow_tts/samples/be_a_voice_not_an_echo.wav
diff --git a/local/en-us/cmu_aew-glow_tts/samples/im_sorry_dave.wav b/local/en-us/cmu_aew-glow_tts/samples/im_sorry_dave.wav
diff --git a/local/en-us/cmu_aew-glow_tts/samples/it_took_me_quite_a_long_time_to_develop_a_voice.wav b/local/en-us/cmu_aew-glow_tts/samples/it_took_me_quite_a_long_time_to_develop_a_voice.wav
diff --git a/local/en-us/cmu_aew-glow_tts/samples/prior_to_november.wav b/local/en-us/cmu_aew-glow_tts/samples/prior_to_november.wav
diff --git a/local/en-us/cmu_aew-glow_tts/samples/test_phonemes.csv b/local/en-us/cmu_aew-glow_tts/samples/test_phonemes.csv
@@ -0,0 +1,5 @@
+it_took_me_quite_a_long_time_to_develop_a_voice|3 4 39 23 3 23 4 42 16 3 18 4 14 3 16 27 4 6 23 3 35 3 17 4 33 31 3 23 4 6 18 3 23 4 25 3 9 39 26 4 37 17 35 21 3 35 3 26 4 34 22 3 1 3 35 19 9 3 19 4 7 3 30 4 29 23 3 4 6 3 13 4 29 26 3 4 39 23 3 4 6 18 3 19 4 32 23 3 38 4 20 39 31 3 23 4 25 3 8 4 14 3 22 4 6 17 35 19 23 3 2
+be_a_voice_not_an_echo|3 8 4 14 3 35 3 26 4 34 22 3 1 3 19 4 32 23 3 35 19 3 4 37 16 20 3 2
+im_sorry_dave|3 4 6 18 3 22 4 32 40 14 3 9 4 11 26 3 2 4 6 18 3 35 12 40 4 11 9 3 4 6 3 16 4 29 19 23 3 9 4 25 3 30 4 29 23 3 2
+this_cake_is_great|3 30 4 39 22 3 16 4 11 16 3 4 39 28 3 38 40 4 11 23 3 2 4 39 23 22 3 22 4 20 3 9 39 17 4 39 41 35 22 3 35 19 9 3 18 4 34 22 23 3 2
+prior_to_november|3 21 40 4 6 36 3 23 4 25 3 19 20 26 4 37 18 8 36 3 23 27 4 37 19 23 14 3 22 4 37 16 35 19 9 3 1 3 19 4 6 19 23 4 14 19 3 22 4 39 16 22 23 14 3 45 40 4 14 3 2
diff --git a/local/en-us/cmu_aew-glow_tts/samples/test_sentences.txt b/local/en-us/cmu_aew-glow_tts/samples/test_sentences.txt
@@ -0,0 +1,5 @@
+it_took_me_quite_a_long_time_to_develop_a_voice|It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.
+be_a_voice_not_an_echo|Be a voice, not an echo.
+im_sorry_dave|I'm sorry Dave. I'm afraid I can't do that.
+this_cake_is_great|This cake is great. It's so delicious and moist.
+prior_to_november|Prior to November 22_ordinal, 1963_year.
diff --git a/local/en-us/cmu_aew-glow_tts/samples/this_cake_is_great.wav b/local/en-us/cmu_aew-glow_tts/samples/this_cake_is_great.wav
diff --git a/local/en-us/cmu_ahw-glow_tts/README.md b/local/en-us/cmu_ahw-glow_tts/README.md
@@ -0,0 +1,5 @@
+# English Larynx Voice (ahw)
+
+[GlowTTS](https://github.com/rhasspy/glow-tts-train) model trained from the [CMU Arctic](http://www.festvox.org/cmu_arctic/).
+
+Intended to be used with [gruut](https://github.com/rhasspy/gruut) (`en-us`).
-Original file line number
+Diff line change
@@ -0,0 +1,54 @@
+_
+|
+‖
+#
+a
+aɪ̯
+aʊ̯
+aː
+b
+d
+d͡ʒ
+eː
+f
+g
+h
+iː
+j
+k
+l
+m
+n
+oː
+p
+p͡f
+s
+t
+t͡s
+t͡ʃ
+uː
+v
+x
+yː
+z
+ãː
+ç
+õː
+øː
+ŋ
+œ
+ɐ
+ɔ
+ɔʏ̯
+ə
+ɛ
+ɛː
+ɛ̃ː
+ɪ
+ʁ
+ʃ
+ʊ
+ʏ
+ʒ
+ʔ
+χ