From 43dd3fe6128d372b3aef43c590720c8b410279b3 Mon Sep 17 00:00:00 2001 From: cdhigh Date: Sun, 21 Apr 2024 22:35:46 -0300 Subject: [PATCH] add tts --- application/lib/calibre/web/feeds/news.py | 27 +- application/lib/calibre/web/fetch/simple.py | 6 + .../lib/ebook_translator/engines/base.py | 4 +- application/lib/ebook_tts/engines/__init__.py | 3 + application/lib/ebook_tts/engines/azure.py | 253 +++++++++++++++++ application/lib/ebook_tts/engines/google.py | 260 +++++++++++------- application/lib/ebook_tts/engines/tts_base.py | 50 ++++ application/lib/ebook_tts/html_audiolator.py | 6 +- application/lib/ssml_builder.py | 227 --------------- application/static/base.js | 105 +++++-- application/templates/book_audiolator.html | 71 +++-- application/templates/book_translator.html | 7 +- application/view/adv.py | 1 - application/view/translator.py | 3 +- application/work/worker.py | 35 ++- docker/Dockerfile | 8 +- tools/mp3cat/mp3cat | Bin 0 -> 2185664 bytes tools/mp3cat/mp3cat.exe | Bin 0 -> 2577920 bytes tools/mp3cat/readme.md | 2 + 19 files changed, 671 insertions(+), 397 deletions(-) create mode 100644 application/lib/ebook_tts/engines/azure.py create mode 100644 application/lib/ebook_tts/engines/tts_base.py delete mode 100644 application/lib/ssml_builder.py create mode 100644 tools/mp3cat/mp3cat create mode 100644 tools/mp3cat/mp3cat.exe create mode 100644 tools/mp3cat/readme.md diff --git a/application/lib/calibre/web/feeds/news.py b/application/lib/calibre/web/feeds/news.py index 093c9689..be91aeab 100644 --- a/application/lib/calibre/web/feeds/news.py +++ b/application/lib/calibre/web/feeds/news.py @@ -90,6 +90,7 @@ def __init__(self): self.preprocess_raw_html = None self.get_delay = None self.max_files = None + self.keep_image = True #每篇文章的下载任务参数 @@ -162,7 +163,7 @@ class BasicNewsRecipe(Recipe): #: that have overly complex stylesheets unsuitable for conversion #: to e-book formats. #: If True stylesheets are not downloaded and processed - no_stylesheets = False + no_stylesheets = True #: Convenient flag to strip all JavaScript tags from the downloaded HTML remove_javascript = True @@ -433,6 +434,8 @@ class BasicNewsRecipe(Recipe): #: Set to False if you do not want to use gzipped transfers. Note that some old servers flake out with gzip handle_gzip = True + keep_image = True + # set by worker.py translator = {} tts = {} @@ -966,7 +969,7 @@ def __init__(self, options, log, output_dir, fs, feed_index_start=0): self.web2disk_options = wOpts = Web2diskOptions() for attr in ('keep_only_tags', 'remove_tags', 'preprocess_regexps', 'skip_ad_pages', 'preprocess_html', - 'remove_tags_after', 'remove_tags_before', 'is_link_wanted', 'compress_news_images', + 'remove_tags_after', 'remove_tags_before', 'is_link_wanted', 'compress_news_images', 'keep_image', 'compress_news_images_max_size', 'compress_news_images_auto_size', 'scale_news_images', 'filter_regexps', 'match_regexps', 'no_stylesheets', 'verbose', 'delay', 'timeout', 'recursions', 'encoding'): setattr(wOpts, attr, getattr(self, attr)) @@ -1083,11 +1086,12 @@ def _postprocess_html(self, soup, first_fetch, job_info): x.name = 'div' #If tts need, tts propery is set by WorkerImpl - if self.tts.get('enable'): + tts_enable = self.tts.get('enable') + if tts_enable: self.audiofy_html(soup, title, job_info) #If translation need, translator propery is set by WorkerImpl - if self.translator.get('enable'): + if self.translator.get('enable') and (tts_enable != 'audio_only'): self.translate_html(soup, title) if job_info: @@ -1204,6 +1208,9 @@ def download(self): return res finally: self.cleanup() + #如果设置为仅推送音频,则删掉feed实例 + if self.tts.get('enable') == 'audio_only': + self.feed_objects = [] @property def lang_for_html(self): @@ -1298,6 +1305,8 @@ def _fetch_article(self, job_info, preloaded=None): br = self.browser self.web2disk_options.browser = br self.web2disk_options.dir = job_info.art_dir + if self.tts.get('enable') == 'audio_only': + self.web2disk_options.keep_image = False fetcher = RecursiveFetcher(self.web2disk_options, self.fs, self.log, job_info, self.image_map, self.css_map) fetcher.browser = br @@ -1992,7 +2001,7 @@ def internal_postprocess_book(self, oeb, opts, log): def translate_html(self, soup, title): from ebook_translator import HtmlTranslator translator = HtmlTranslator(self.translator, self.simultaneous_downloads) - self.log.debug(f'Translating [{title}]') + self.log.info(f'Translating html [{title}]') translator.translate_soup(soup) #翻译Feed的title,toc时用到 @@ -2017,24 +2026,26 @@ def translate_titles(self, feeds): #调用在线TTS服务平台,将html转为语音 #每个音频片段都会调用一次callback(audioDict, title, feed_index, article_index) def audiofy_html(self, soup, title, job_info): - default_log.info(f'audiofy_html {title}') from ebook_tts import HtmlAudiolator audiolator = HtmlAudiolator(self.tts) - self.log.debug(f'Translating [{title}]') + self.log.info(f'Audiofying html [{title}]') ret = audiolator.audiofy_soup(soup) if not ret['error']: #保存音频到磁盘,这个地方就不能使用fs了,因为最后合并mp3时无法使用虚拟文件系统 if not self.tts.get('audio_dir'): system_temp_dir = os.environ.get('KE_TEMP_DIR') self.tts['audio_dir'] = PersistentTemporaryDirectory(prefix='tts_', dir=system_temp_dir) + if not self.tts.get('audios'): + self.tts['audios'] = [] audio_dir = self.tts['audio_dir'] ext = ret['mime'].split('/')[-1] ext = {'mpeg': 'mp3'}.get(ext, ext) for idx, audio in enumerate(ret['audios']): - filename = f'{job_info.f_idx:04d}_{job_info.a_idx:04d}_{idx:04d}.{ext}' + filename = f'{job_info.f_idx:03d}_{job_info.a_idx:03d}_{idx:04d}.{ext}' filename = os.path.join(audio_dir, filename) try: with open(filename, 'wb') as f: f.write(audio) + self.tts['audios'].append(filename) except Exception as e: self.log.warning(f'Failed to write "{filename}": {e}') else: diff --git a/application/lib/calibre/web/fetch/simple.py b/application/lib/calibre/web/fetch/simple.py index 38b1a99a..e604e88a 100644 --- a/application/lib/calibre/web/fetch/simple.py +++ b/application/lib/calibre/web/fetch/simple.py @@ -180,6 +180,7 @@ def __init__(self, options, fs, log, job_info=None, image_map=None, css_map=None self.scale_news_images = getattr(options, 'scale_news_images', None) self.get_delay = getattr(options, 'get_delay', None) self.download_stylesheets = not options.no_stylesheets + self.keep_image = options.keep_image self.show_progress = False self.failed_links = [] self.job_info = job_info @@ -401,6 +402,11 @@ def rescale_image(self, data): return rescale_image(data, self.scale_news_images, self.compress_news_images_max_size, self.compress_news_images_auto_size) def process_images(self, soup, baseurl): + if not self.keep_image: + for tag in soup.find_all('img'): + tag.decompose() + return + diskpath = unicode_path(os.path.join(self.current_dir, 'images')) self.fs.mkdir(diskpath) diff --git a/application/lib/ebook_translator/engines/base.py b/application/lib/ebook_translator/engines/base.py index 4e47534c..d3790ba9 100644 --- a/application/lib/ebook_translator/engines/base.py +++ b/application/lib/ebook_translator/engines/base.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -import ssl -import os.path -import traceback +import os, traceback from urllib.parse import urljoin from urlopener import UrlOpener diff --git a/application/lib/ebook_tts/engines/__init__.py b/application/lib/ebook_tts/engines/__init__.py index 8f5dc74a..72147c8f 100644 --- a/application/lib/ebook_tts/engines/__init__.py +++ b/application/lib/ebook_tts/engines/__init__.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- +from .tts_base import TTSBase +from .azure import AzureTTS from .google import GoogleWebTTSFree, GoogleTextToSpeech builtin_tts_engines = { + AzureTTS.name: AzureTTS, GoogleWebTTSFree.name: GoogleWebTTSFree, GoogleTextToSpeech.name: GoogleTextToSpeech, } diff --git a/application/lib/ebook_tts/engines/azure.py b/application/lib/ebook_tts/engines/azure.py new file mode 100644 index 00000000..a7ec2ef3 --- /dev/null +++ b/application/lib/ebook_tts/engines/azure.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +#Azure text-to-speech +#https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech +import json +from urllib.parse import urljoin +from urlopener import UrlOpener +from .tts_base import TTSBase + +#键为BCP-47语种代码,值为语音名字列表 +azuretts_languages = { + 'af-ZA': ['af-ZA-AdriNeural', 'af-ZA-WillemNeural'], + 'am-ET': ['am-ET-MekdesNeural', 'am-ET-AmehaNeural'], + 'ar-AE': ['ar-AE-FatimaNeural', 'ar-AE-HamdanNeural'], + 'ar-BH': ['ar-BH-LailaNeural', 'ar-BH-AliNeural'], + 'ar-DZ': ['ar-DZ-AminaNeural', 'ar-DZ-IsmaelNeural'], + 'ar-EG': ['ar-EG-SalmaNeural', 'ar-EG-ShakirNeural'], + 'ar-IQ': ['ar-IQ-RanaNeural', 'ar-IQ-BasselNeural'], + 'ar-JO': ['ar-JO-SanaNeural', 'ar-JO-TaimNeural'], + 'ar-KW': ['ar-KW-NouraNeural', 'ar-KW-FahedNeural'], + 'ar-LB': ['ar-LB-LaylaNeural', 'ar-LB-RamiNeural'], + 'ar-LY': ['ar-LY-ImanNeural', 'ar-LY-OmarNeural'], + 'ar-MA': ['ar-MA-MounaNeural', 'ar-MA-JamalNeural'], + 'ar-OM': ['ar-OM-AyshaNeural', 'ar-OM-AbdullahNeural'], + 'ar-QA': ['ar-QA-AmalNeural', 'ar-QA-MoazNeural'], + 'ar-SA': ['ar-SA-ZariyahNeural', 'ar-SA-HamedNeural'], + 'ar-SY': ['ar-SY-AmanyNeural', 'ar-SY-LaithNeural'], + 'ar-TN': ['ar-TN-ReemNeural', 'ar-TN-HediNeural'], + 'ar-YE': ['ar-YE-MaryamNeural', 'ar-YE-SalehNeural'], + 'az-AZ': ['az-AZ-BanuNeural', 'az-AZ-BabekNeural'], + 'bg-BG': ['bg-BG-KalinaNeural', 'bg-BG-BorislavNeural'], + 'bn-BD': ['bn-BD-NabanitaNeural', 'bn-BD-PradeepNeural'], + 'bn-IN': ['bn-IN-TanishaaNeural', 'bn-IN-BashkarNeural'], + 'bs-BA': ['bs-BA-VesnaNeural', 'bs-BA-GoranNeural'], + 'ca-ES': ['ca-ES-JoanaNeural', 'ca-ES-EnricNeural', 'ca-ES-AlbaNeural'], + 'cs-CZ': ['cs-CZ-VlastaNeural', 'cs-CZ-AntoninNeural'], + 'cy-GB': ['cy-GB-NiaNeural', 'cy-GB-AledNeural'], + 'da-DK': ['da-DK-ChristelNeural', 'da-DK-JeppeNeural'], + 'de-AT': ['de-AT-IngridNeural', 'de-AT-JonasNeural'], + 'de-CH': ['de-CH-LeniNeural', 'de-CH-JanNeural'], + 'de-DE': ['de-DE-KatjaNeural', 'de-DE-ConradNeural', 'de-DE-AmalaNeural', 'de-DE-BerndNeural', 'de-DE-ChristophNeural', 'de-DE-ElkeNeural', 'de-DE-FlorianMultilingualNeural', 'de-DE-GiselaNeural', 'de-DE-KasperNeural', 'de-DE-KillianNeural', 'de-DE-KlarissaNeural', 'de-DE-KlausNeural', 'de-DE-LouisaNeural', 'de-DE-MajaNeural', 'de-DE-RalfNeural', 'de-DE-SeraphinaMultilingualNeural', 'de-DE-TanjaNeural'], + 'el-GR': ['el-GR-AthinaNeural', 'el-GR-NestorasNeural'], + 'en-AU': ['en-AU-NatashaNeural', 'en-AU-WilliamNeural', 'en-AU-AnnetteNeural', 'en-AU-CarlyNeural', 'en-AU-DarrenNeural', 'en-AU-DuncanNeural', 'en-AU-ElsieNeural', 'en-AU-FreyaNeural', 'en-AU-JoanneNeural', 'en-AU-KenNeural', 'en-AU-KimNeural', 'en-AU-NeilNeural', 'en-AU-TimNeural', 'en-AU-TinaNeural'], + 'en-CA': ['en-CA-ClaraNeural', 'en-CA-LiamNeural'], + 'en-GB': ['en-GB-SoniaNeural', 'en-GB-RyanNeural', 'en-GB-LibbyNeural', 'en-GB-AbbiNeural', 'en-GB-AlfieNeural', 'en-GB-BellaNeural', 'en-GB-ElliotNeural', 'en-GB-EthanNeural', 'en-GB-HollieNeural', 'en-GB-MaisieNeural', 'en-GB-NoahNeural', 'en-GB-OliverNeural', 'en-GB-OliviaNeural', 'en-GB-ThomasNeural', 'en-GB-MiaNeural'], + 'en-HK': ['en-HK-YanNeural', 'en-HK-SamNeural'], + 'en-IE': ['en-IE-EmilyNeural', 'en-IE-ConnorNeural'], + 'en-IN': ['en-IN-NeerjaNeural', 'en-IN-PrabhatNeural'], + 'en-KE': ['en-KE-AsiliaNeural', 'en-KE-ChilembaNeural'], + 'en-NG': ['en-NG-EzinneNeural', 'en-NG-AbeoNeural'], + 'en-NZ': ['en-NZ-MollyNeural', 'en-NZ-MitchellNeural'], + 'en-PH': ['en-PH-RosaNeural', 'en-PH-JamesNeural'], + 'en-SG': ['en-SG-LunaNeural', 'en-SG-WayneNeural'], + 'en-TZ': ['en-TZ-ImaniNeural', 'en-TZ-ElimuNeural'], + 'en-US': ['en-US-AvaMultilingualNeural', 'en-US-AndrewMultilingualNeural', 'en-US-EmmaMultilingualNeural', 'en-US-BrianMultilingualNeural', 'en-US-AvaNeural', 'en-US-AndrewNeural', 'en-US-EmmaNeural', 'en-US-BrianNeural', 'en-US-JennyNeural', 'en-US-GuyNeural', 'en-US-AriaNeural', 'en-US-DavisNeural', 'en-US-JaneNeural', 'en-US-JasonNeural', 'en-US-SaraNeural', 'en-US-TonyNeural', 'en-US-NancyNeural', 'en-US-AmberNeural', 'en-US-AnaNeural', 'en-US-AshleyNeural', 'en-US-BrandonNeural', 'en-US-ChristopherNeural', 'en-US-CoraNeural', 'en-US-ElizabethNeural', 'en-US-EricNeural', 'en-US-JacobNeural', 'en-US-JennyMultilingualNeural', 'en-US-MichelleNeural', 'en-US-MonicaNeural', 'en-US-RogerNeural', 'en-US-RyanMultilingualNeural', 'en-US-SteffanNeural'], + 'en-ZA': ['en-ZA-LeahNeural', 'en-ZA-LukeNeural'], + 'es-AR': ['es-AR-ElenaNeural', 'es-AR-TomasNeural'], + 'es-BO': ['es-BO-SofiaNeural', 'es-BO-MarceloNeural'], + 'es-CL': ['es-CL-CatalinaNeural', 'es-CL-LorenzoNeural'], + 'es-CO': ['es-CO-SalomeNeural', 'es-CO-GonzaloNeural'], + 'es-CR': ['es-CR-MariaNeural', 'es-CR-JuanNeural'], + 'es-CU': ['es-CU-BelkysNeural', 'es-CU-ManuelNeural'], + 'es-DO': ['es-DO-RamonaNeural', 'es-DO-EmilioNeural'], + 'es-EC': ['es-EC-AndreaNeural', 'es-EC-LuisNeural'], + 'es-ES': ['es-ES-ElviraNeural', 'es-ES-AlvaroNeural', 'es-ES-AbrilNeural', 'es-ES-ArnauNeural', 'es-ES-DarioNeural', 'es-ES-EliasNeural', 'es-ES-EstrellaNeural', 'es-ES-IreneNeural', 'es-ES-LaiaNeural', 'es-ES-LiaNeural', 'es-ES-NilNeural', 'es-ES-SaulNeural', 'es-ES-TeoNeural', 'es-ES-TrianaNeural', 'es-ES-VeraNeural', 'es-ES-XimenaNeural'], + 'es-GQ': ['es-GQ-TeresaNeural', 'es-GQ-JavierNeural'], + 'es-GT': ['es-GT-MartaNeural', 'es-GT-AndresNeural'], + 'es-HN': ['es-HN-KarlaNeural', 'es-HN-CarlosNeural'], + 'es-MX': ['es-MX-DaliaNeural', 'es-MX-JorgeNeural', 'es-MX-BeatrizNeural', 'es-MX-CandelaNeural', 'es-MX-CarlotaNeural', 'es-MX-CecilioNeural', 'es-MX-GerardoNeural', 'es-MX-LarissaNeural', 'es-MX-LibertoNeural', 'es-MX-LucianoNeural', 'es-MX-MarinaNeural', 'es-MX-NuriaNeural', 'es-MX-PelayoNeural', 'es-MX-RenataNeural', 'es-MX-YagoNeural'], + 'es-NI': ['es-NI-YolandaNeural', 'es-NI-FedericoNeural'], + 'es-PA': ['es-PA-MargaritaNeural', 'es-PA-RobertoNeural'], + 'es-PE': ['es-PE-CamilaNeural', 'es-PE-AlexNeural'], + 'es-PR': ['es-PR-KarinaNeural', 'es-PR-VictorNeural'], + 'es-PY': ['es-PY-TaniaNeural', 'es-PY-MarioNeural'], + 'es-SV': ['es-SV-LorenaNeural', 'es-SV-RodrigoNeural'], + 'es-US': ['es-US-PalomaNeural', 'es-US-AlonsoNeural'], + 'es-UY': ['es-UY-ValentinaNeural', 'es-UY-MateoNeural'], + 'es-VE': ['es-VE-PaolaNeural', 'es-VE-SebastianNeural'], + 'et-EE': ['et-EE-AnuNeural', 'et-EE-KertNeural'], + 'eu-ES': ['eu-ES-AinhoaNeural', 'eu-ES-AnderNeural'], + 'fa-IR': ['fa-IR-DilaraNeural', 'fa-IR-FaridNeural'], + 'fi-FI': ['fi-FI-SelmaNeural', 'fi-FI-HarriNeural', 'fi-FI-NooraNeural'], + 'fil-PH': ['fil-PH-BlessicaNeural', 'fil-PH-AngeloNeural'], + 'fr-BE': ['fr-BE-CharlineNeural', 'fr-BE-GerardNeural'], + 'fr-CA': ['fr-CA-SylvieNeural', 'fr-CA-JeanNeural', 'fr-CA-AntoineNeural', 'fr-CA-ThierryNeural'], + 'fr-CH': ['fr-CH-ArianeNeural', 'fr-CH-FabriceNeural'], + 'fr-FR': ['fr-FR-DeniseNeural', 'fr-FR-HenriNeural', 'fr-FR-AlainNeural', 'fr-FR-BrigitteNeural', 'fr-FR-CelesteNeural', 'fr-FR-ClaudeNeural', 'fr-FR-CoralieNeural', 'fr-FR-EloiseNeural', 'fr-FR-JacquelineNeural', 'fr-FR-JeromeNeural', 'fr-FR-JosephineNeural', 'fr-FR-MauriceNeural', 'fr-FR-RemyMultilingualNeural', 'fr-FR-VivienneMultilingualNeural', 'fr-FR-YvesNeural', 'fr-FR-YvetteNeural'], + 'ga-IE': ['ga-IE-OrlaNeural', 'ga-IE-ColmNeural'], + 'gl-ES': ['gl-ES-SabelaNeural', 'gl-ES-RoiNeural'], + 'gu-IN': ['gu-IN-DhwaniNeural', 'gu-IN-NiranjanNeural'], + 'he-IL': ['he-IL-HilaNeural', 'he-IL-AvriNeural'], + 'hi-IN': ['hi-IN-SwaraNeural', 'hi-IN-MadhurNeural'], + 'hr-HR': ['hr-HR-GabrijelaNeural', 'hr-HR-SreckoNeural'], + 'hu-HU': ['hu-HU-NoemiNeural', 'hu-HU-TamasNeural'], + 'hy-AM': ['hy-AM-AnahitNeural', 'hy-AM-HaykNeural'], + 'id-ID': ['id-ID-GadisNeural', 'id-ID-ArdiNeural'], + 'is-IS': ['is-IS-GudrunNeural', 'is-IS-GunnarNeural'], + 'it-IT': ['it-IT-ElsaNeural', 'it-IT-IsabellaNeural', 'it-IT-DiegoNeural', 'it-IT-BenignoNeural', 'it-IT-CalimeroNeural', 'it-IT-CataldoNeural', 'it-IT-FabiolaNeural', 'it-IT-FiammaNeural', 'it-IT-GianniNeural', 'it-IT-GiuseppeNeural', 'it-IT-ImeldaNeural', 'it-IT-IrmaNeural', 'it-IT-LisandroNeural', 'it-IT-PalmiraNeural', 'it-IT-PierinaNeural', 'it-IT-RinaldoNeural'], + 'ja-JP': ['ja-JP-NanamiNeural', 'ja-JP-KeitaNeural', 'ja-JP-AoiNeural', 'ja-JP-DaichiNeural', 'ja-JP-MayuNeural', 'ja-JP-NaokiNeural', 'ja-JP-ShioriNeural'], + 'jv-ID': ['jv-ID-SitiNeural', 'jv-ID-DimasNeural'], + 'ka-GE': ['ka-GE-EkaNeural', 'ka-GE-GiorgiNeural'], + 'kk-KZ': ['kk-KZ-AigulNeural', 'kk-KZ-DauletNeural'], + 'km-KH': ['km-KH-SreymomNeural', 'km-KH-PisethNeural'], + 'kn-IN': ['kn-IN-SapnaNeural', 'kn-IN-GaganNeural'], + 'ko-KR': ['ko-KR-SunHiNeural', 'ko-KR-InJoonNeural', 'ko-KR-BongJinNeural', 'ko-KR-GookMinNeural', 'ko-KR-HyunsuNeural', 'ko-KR-JiMinNeural', 'ko-KR-SeoHyeonNeural', 'ko-KR-SoonBokNeural', 'ko-KR-YuJinNeural'], + 'lo-LA': ['lo-LA-KeomanyNeural', 'lo-LA-ChanthavongNeural'], + 'lt-LT': ['lt-LT-OnaNeural', 'lt-LT-LeonasNeural'], + 'lv-LV': ['lv-LV-EveritaNeural', 'lv-LV-NilsNeural'], + 'mk-MK': ['mk-MK-MarijaNeural', 'mk-MK-AleksandarNeural'], + 'ml-IN': ['ml-IN-SobhanaNeural', 'ml-IN-MidhunNeural'], + 'mn-MN': ['mn-MN-YesuiNeural', 'mn-MN-BataaNeural'], + 'mr-IN': ['mr-IN-AarohiNeural', 'mr-IN-ManoharNeural'], + 'ms-MY': ['ms-MY-YasminNeural', 'ms-MY-OsmanNeural'], + 'mt-MT': ['mt-MT-GraceNeural', 'mt-MT-JosephNeural'], + 'my-MM': ['my-MM-NilarNeural', 'my-MM-ThihaNeural'], + 'nb-NO': ['nb-NO-PernilleNeural', 'nb-NO-FinnNeural', 'nb-NO-IselinNeural'], + 'ne-NP': ['ne-NP-HemkalaNeural', 'ne-NP-SagarNeural'], + 'nl-BE': ['nl-BE-DenaNeural', 'nl-BE-ArnaudNeural'], + 'nl-NL': ['nl-NL-FennaNeural', 'nl-NL-MaartenNeural', 'nl-NL-ColetteNeural'], + 'pl-PL': ['pl-PL-AgnieszkaNeural', 'pl-PL-MarekNeural', 'pl-PL-ZofiaNeural'], + 'ps-AF': ['ps-AF-LatifaNeural', 'ps-AF-GulNawazNeural'], + 'pt-BR': ['pt-BR-FranciscaNeural', 'pt-BR-AntonioNeural', 'pt-BR-BrendaNeural', 'pt-BR-DonatoNeural', 'pt-BR-ElzaNeural', 'pt-BR-FabioNeural', 'pt-BR-GiovannaNeural', 'pt-BR-HumbertoNeural', 'pt-BR-JulioNeural', 'pt-BR-LeilaNeural', 'pt-BR-LeticiaNeural', 'pt-BR-ManuelaNeural', 'pt-BR-NicolauNeural', 'pt-BR-ThalitaNeural', 'pt-BR-ValerioNeural', 'pt-BR-YaraNeural'], + 'pt-PT': ['pt-PT-RaquelNeural', 'pt-PT-DuarteNeural', 'pt-PT-FernandaNeural'], + 'ro-RO': ['ro-RO-AlinaNeural', 'ro-RO-EmilNeural'], + 'ru-RU': ['ru-RU-SvetlanaNeural', 'ru-RU-DmitryNeural', 'ru-RU-DariyaNeural'], + 'si-LK': ['si-LK-ThiliniNeural', 'si-LK-SameeraNeural'], + 'sk-SK': ['sk-SK-ViktoriaNeural', 'sk-SK-LukasNeural'], + 'sl-SI': ['sl-SI-PetraNeural', 'sl-SI-RokNeural'], + 'so-SO': ['so-SO-UbaxNeural', 'so-SO-MuuseNeural'], + 'sq-AL': ['sq-AL-AnilaNeural', 'sq-AL-IlirNeural'], + 'sr-Latn-RS': ['sr-Latn-RS-NicholasNeural', 'sr-Latn-RS-SophieNeural'], + 'sr-RS': ['sr-RS-SophieNeural', 'sr-RS-NicholasNeural'], + 'su-ID': ['su-ID-TutiNeural', 'su-ID-JajangNeural'], + 'sv-SE': ['sv-SE-SofieNeural', 'sv-SE-MattiasNeural', 'sv-SE-HilleviNeural'], + 'sw-KE': ['sw-KE-ZuriNeural', 'sw-KE-RafikiNeural'], + 'sw-TZ': ['sw-TZ-RehemaNeural', 'sw-TZ-DaudiNeural'], + 'ta-IN': ['ta-IN-PallaviNeural', 'ta-IN-ValluvarNeural'], + 'ta-LK': ['ta-LK-SaranyaNeural', 'ta-LK-KumarNeural'], + 'ta-MY': ['ta-MY-KaniNeural', 'ta-MY-SuryaNeural'], + 'ta-SG': ['ta-SG-VenbaNeural', 'ta-SG-AnbuNeural'], + 'te-IN': ['te-IN-ShrutiNeural', 'te-IN-MohanNeural'], + 'th-TH': ['th-TH-PremwadeeNeural', 'th-TH-NiwatNeural', 'th-TH-AcharaNeural'], + 'tr-TR': ['tr-TR-EmelNeural', 'tr-TR-AhmetNeural'], + 'uk-UA': ['uk-UA-PolinaNeural', 'uk-UA-OstapNeural'], + 'ur-IN': ['ur-IN-GulNeural', 'ur-IN-SalmanNeural'], + 'ur-PK': ['ur-PK-UzmaNeural', 'ur-PK-AsadNeural'], + 'uz-UZ': ['uz-UZ-MadinaNeural', 'uz-UZ-SardorNeural'], + 'vi-VN': ['vi-VN-HoaiMyNeural', 'vi-VN-NamMinhNeural'], + 'wuu-CN': ['wuu-CN-XiaotongNeural', 'wuu-CN-YunzheNeural'], + 'yue-CN': ['yue-CN-XiaoMinNeural', 'yue-CN-YunSongNeural'], + 'zh-CN': ['zh-CN-XiaoxiaoNeural', 'zh-CN-YunxiNeural', 'zh-CN-YunjianNeural', 'zh-CN-XiaoyiNeural', 'zh-CN-YunyangNeural', 'zh-CN-XiaochenNeural', 'zh-CN-XiaohanNeural', 'zh-CN-XiaomengNeural', 'zh-CN-XiaomoNeural', 'zh-CN-XiaoqiuNeural', 'zh-CN-XiaoruiNeural', 'zh-CN-XiaoshuangNeural', 'zh-CN-XiaoxiaoMultilingualNeural', 'zh-CN-XiaoyanNeural', 'zh-CN-XiaoyouNeural', 'zh-CN-XiaozhenNeural', 'zh-CN-YunfengNeural', 'zh-CN-YunhaoNeural', 'zh-CN-YunxiaNeural', 'zh-CN-YunyeNeural', 'zh-CN-YunzeNeural', 'zh-CN-XiaoxuanNeural'], + 'zh-CN-henan': ['zh-CN-henan-YundengNeural'], + 'zh-CN-liaoning': ['zh-CN-liaoning-XiaobeiNeural'], + 'zh-CN-shaanxi': ['zh-CN-shaanxi-XiaoniNeural'], + 'zh-CN-shandong': ['zh-CN-shandong-YunxiangNeural'], + 'zh-CN-sichuan': ['zh-CN-sichuan-YunxiNeural'], + 'zh-HK': ['zh-HK-HiuMaanNeural', 'zh-HK-WanLungNeural', 'zh-HK-HiuGaaiNeural'], + 'zh-TW': ['zh-TW-HsiaoChenNeural', 'zh-TW-YunJheNeural', 'zh-TW-HsiaoYuNeural'], + 'zu-ZA': ['zu-ZA-ThandoNeural', 'zu-ZA-ThembaNeural'], +} + +#区域字典,键为区域代码,值为显示字符串 +azure_regions = { + 'southafricanorth': 'South Africa North', + 'eastasia': 'East Asia', + 'southeastasia': 'Southeast Asia', + 'australiaeast': 'Australia East', + 'centralindia': 'Central India', + 'japaneast': 'Japan East', + 'japanwest': 'Japan West', + 'koreacentral': 'Korea Central', + 'canadacentral': 'Canada Central', + 'northeurope': 'North Europe', + 'westeurope': 'West Europe', + 'francecentral': 'France Central', + 'germanywestcentral': 'Germany West Central', + 'norwayeast': 'Norway East', + 'swedencentral': 'Sweden Central', + 'switzerlandnorth': 'Switzerland North', + 'switzerlandwest': 'Switzerland West', + 'uksouth': 'UK South', + 'uaenorth': 'UAE North', + 'brazilsouth': 'Brazil South', + 'qatarcentral': 'Qatar Central', + 'centralus': 'Central US', + 'eastus': 'East US', + 'eastus2': 'East US 2', + 'northcentralus': 'North Central US', + 'southcentralus': 'South Central US', + 'westcentralus': 'West Central US', + 'westus': 'West US', + 'westus2': 'West US 2', + 'westus3': 'West US 3', +} + +class AzureTTS(TTSBase): + name = 'AzureTTS' + alias = 'Microsoft Azure Text to Speech' + need_api_key = True + api_key_hint = 'subscription key' + default_api_host = 'https://{region}.tts.speech.microsoft.com/cognitiveservices/' + default_api_host2 = 'https://{region}.tts.speech.azure.us/cognitiveservices/' + default_timeout = 60 + #https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits + request_interval = 3 #20 transactions per 60 seconds + #每段音频不能超过10分钟,所以对于中文,大约2000字,因为大约1500 word + max_len_per_request = 1000 + languages = azuretts_languages + regions = azure_regions + region_url = 'https://learn.microsoft.com/en-us/azure/ai-services/speech-service/regions' + voice_url = 'https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts' + language_url = 'https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts' + + def __init__(self, params): + super().__init__(params) + if self.region in ('usgovarizona', 'usgovvirginia'): + self.mainUrl = self.default_api_host2.format(region=self.region) + else: + self.mainUrl = self.default_api_host.format(region=self.region) + if not self.mainUrl.endswith('/'): + self.mainUrl += '/' + self.opener = UrlOpener(timeout=self.timeout, headers={'Ocp-Apim-Subscription-Key': self.key}) + + #获取支持的语音列表,注意,这个会返回一个超级大的json对象 + #或者可以直接到网页去查询 + #https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts + def voice_list(self): + url = urljoin(self.mainUrl, 'voices/list') + resp = self.opener.open(url) + if resp.status_code == 200: + return resp.json() + else: + return {'status': UrlOpener.CodeMap(resp.status_code)} + + #文本转换为语音, + #支持的音频格式参考: + # + def tts(self, text): + url = urljoin(self.mainUrl, 'v1') + headers = {'Content-Type': 'application/ssml+xml', + 'X-Microsoft-OutputFormat': 'audio-24khz-48kbitrate-mono-mp3', + 'User-Agent': 'KindleEar', + } + resp = self.opener.open(url, headers=headers, data=self.ssml(text)) + if resp.status_code == 200: + #返回的是stream流形式 + content = b''.join(line for line in resp.iter_content(chunk_size=None)) + return ('audio/mpeg', content) + else: + raise Exception(self.opener.CodeMap(resp.status_code)) + + + diff --git a/application/lib/ebook_tts/engines/google.py b/application/lib/ebook_tts/engines/google.py index 92f47a1b..47e6c007 100644 --- a/application/lib/ebook_tts/engines/google.py +++ b/application/lib/ebook_tts/engines/google.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- import io +from functools import partial try: import gtts @@ -12,139 +13,200 @@ except ImportError: texttospeech = None +from .tts_base import TTSBase + +#键为BCP-47语种代码,值为语音名字列表,因为gtts不支持语音选择,所以列表为空 gtts_languages = { - "af": "Afrikaans", - "ar": "Arabic", - "bg": "Bulgarian", - "bn": "Bengali", - "bs": "Bosnian", - "ca": "Catalan", - "cs": "Czech", - "da": "Danish", - "de": "German", - "el": "Greek", - "en": "English", - "es": "Spanish", - "et": "Estonian", - "fi": "Finnish", - "fr": "French", - "gu": "Gujarati", - "hi": "Hindi", - "hr": "Croatian", - "hu": "Hungarian", - "id": "Indonesian", - "is": "Icelandic", - "it": "Italian", - "iw": "Hebrew", - "ja": "Japanese", - "jw": "Javanese", - "km": "Khmer", - "kn": "Kannada", - "ko": "Korean", - "la": "Latin", - "lv": "Latvian", - "ml": "Malayalam", - "mr": "Marathi", - "ms": "Malay", - "my": "Myanmar (Burmese)", - "ne": "Nepali", - "nl": "Dutch", - "no": "Norwegian", - "pl": "Polish", - "pt": "Portuguese", - "ro": "Romanian", - "ru": "Russian", - "si": "Sinhala", - "sk": "Slovak", - "sq": "Albanian", - "sr": "Serbian", - "su": "Sundanese", - "sv": "Swedish", - "sw": "Swahili", - "ta": "Tamil", - "te": "Telugu", - "th": "Thai", - "tl": "Filipino", - "tr": "Turkish", - "uk": "Ukrainian", - "ur": "Urdu", - "vi": "Vietnamese", - "zh-CN": "Chinese (Simplified)", - "zh-TW": "Chinese (Mandarin/Taiwan)", - "zh": "Chinese (Mandarin)" + 'af': [], + 'ar': [], + 'bg': [], + 'bn': [], + 'bs': [], + 'ca': [], + 'cs': [], + 'da': [], + 'de': [], + 'el': [], + 'en': [], + 'es': [], + 'et': [], + 'fi': [], + 'fr': [], + 'gu': [], + 'hi': [], + 'hr': [], + 'hu': [], + 'id': [], + 'is': [], + 'it': [], + 'iw': [], + 'ja': [], + 'jw': [], + 'km': [], + 'kn': [], + 'ko': [], + 'la': [], + 'lv': [], + 'ml': [], + 'mr': [], + 'ms': [], + 'my': [], + 'ne': [], + 'nl': [], + 'no': [], + 'pl': [], + 'pt': [], + 'ro': [], + 'ru': [], + 'si': [], + 'sk': [], + 'sq': [], + 'sr': [], + 'su': [], + 'sv': [], + 'sw': [], + 'ta': [], + 'te': [], + 'th': [], + 'tl': [], + 'tr': [], + 'uk': [], + 'ur': [], + 'vi': [], + 'zh-CN': [], + 'zh-TW': [], + 'zh': [], } -class GoogleWebTTSFree: +#键为BCP-47语种代码,值为语音名字列表 +googletts_languages = { + 'af-ZA': ['af-ZA-Standard-A'], + 'am-ET': ['am-ET-Standard-A', 'am-ET-Standard-B', 'am-ET-Wavenet-A', 'am-ET-Wavenet-B'], + 'ar-XA': ['ar-XA-Standard-A', 'ar-XA-Standard-B', 'ar-XA-Standard-C', 'ar-XA-Standard-D', 'ar-XA-Wavenet-A', 'ar-XA-Wavenet-B', 'ar-XA-Wavenet-C', 'ar-XA-Wavenet-D'], + 'bg-BG': ['bg-BG-Standard-A'], + 'bn-IN': ['bn-IN-Standard-A', 'bn-IN-Standard-B', 'bn-IN-Standard-C', 'bn-IN-Standard-D', 'bn-IN-Wavenet-A', 'bn-IN-Wavenet-B', 'bn-IN-Wavenet-C', 'bn-IN-Wavenet-D'], + 'ca-ES': ['ca-ES-Standard-A'], + 'cmn-CN': ['cmn-CN-Standard-A', 'cmn-CN-Standard-B', 'cmn-CN-Standard-C', 'cmn-CN-Standard-D', 'cmn-CN-Wavenet-A', 'cmn-CN-Wavenet-B', 'cmn-CN-Wavenet-C', 'cmn-CN-Wavenet-D'], + 'cmn-TW': ['cmn-TW-Standard-A', 'cmn-TW-Standard-B', 'cmn-TW-Standard-C', 'cmn-TW-Wavenet-A', 'cmn-TW-Wavenet-B', 'cmn-TW-Wavenet-C'], + 'cs-CZ': ['cs-CZ-Standard-A', 'cs-CZ-Wavenet-A'], + 'da-DK': ['da-DK-Neural2-D', 'da-DK-Standard-A', 'da-DK-Standard-C', 'da-DK-Standard-D', 'da-DK-Standard-E', 'da-DK-Wavenet-A', 'da-DK-Wavenet-C', 'da-DK-Wavenet-D', 'da-DK-Wavenet-E'], + 'de-DE': ['de-DE-Neural2-A', 'de-DE-Neural2-B', 'de-DE-Neural2-C', 'de-DE-Neural2-D', 'de-DE-Neural2-F', 'de-DE-Polyglot-1', 'de-DE-Standard-A', 'de-DE-Standard-B', 'de-DE-Standard-C', 'de-DE-Standard-D', 'de-DE-Standard-E', 'de-DE-Standard-F', 'de-DE-Studio-B', 'de-DE-Studio-C', 'de-DE-Wavenet-A', 'de-DE-Wavenet-B', 'de-DE-Wavenet-C', 'de-DE-Wavenet-D', 'de-DE-Wavenet-E', 'de-DE-Wavenet-F'], + 'el-GR': ['el-GR-Standard-A', 'el-GR-Wavenet-A'], + 'en-AU': ['en-AU-Neural2-A', 'en-AU-Neural2-B', 'en-AU-Neural2-C', 'en-AU-Neural2-D', 'en-AU-News-E', 'en-AU-News-F', 'en-AU-News-G', 'en-AU-Polyglot-1', 'en-AU-Standard-A', 'en-AU-Standard-B', 'en-AU-Standard-C', 'en-AU-Standard-D', 'en-AU-Wavenet-A', 'en-AU-Wavenet-B', 'en-AU-Wavenet-C', 'en-AU-Wavenet-D'], + 'en-GB': ['en-GB-Neural2-A', 'en-GB-Neural2-B', 'en-GB-Neural2-C', 'en-GB-Neural2-D', 'en-GB-Neural2-F', 'en-GB-News-G', 'en-GB-News-H', 'en-GB-News-I', 'en-GB-News-J', 'en-GB-News-K', 'en-GB-News-L', 'en-GB-News-M', 'en-GB-Standard-A', 'en-GB-Standard-B', 'en-GB-Standard-C', 'en-GB-Standard-D', 'en-GB-Standard-F', 'en-GB-Studio-B', 'en-GB-Studio-C', 'en-GB-Wavenet-A', 'en-GB-Wavenet-B', 'en-GB-Wavenet-C', 'en-GB-Wavenet-D', 'en-GB-Wavenet-F'], + 'en-IN': ['en-IN-Neural2-A', 'en-IN-Neural2-B', 'en-IN-Neural2-C', 'en-IN-Neural2-D', 'en-IN-Standard-A', 'en-IN-Standard-B', 'en-IN-Standard-C', 'en-IN-Standard-D', 'en-IN-Wavenet-A', 'en-IN-Wavenet-B', 'en-IN-Wavenet-C', 'en-IN-Wavenet-D'], + 'en-US': ['en-US-Casual-K', 'en-US-Journey-D', 'en-US-Journey-F', 'en-US-Neural2-A', 'en-US-Neural2-C', 'en-US-Neural2-D', 'en-US-Neural2-E', 'en-US-Neural2-F', 'en-US-Neural2-G', 'en-US-Neural2-H', 'en-US-Neural2-I', 'en-US-Neural2-J', 'en-US-News-K', 'en-US-News-L', 'en-US-News-N', 'en-US-Polyglot-1', 'en-US-Standard-A', 'en-US-Standard-B', 'en-US-Standard-C', 'en-US-Standard-D', 'en-US-Standard-E', 'en-US-Standard-F', 'en-US-Standard-G', 'en-US-Standard-H', 'en-US-Standard-I', 'en-US-Standard-J', 'en-US-Studio-O', 'en-US-Studio-Q', 'en-US-Wavenet-A', 'en-US-Wavenet-B', 'en-US-Wavenet-C', 'en-US-Wavenet-D', 'en-US-Wavenet-E', 'en-US-Wavenet-F', 'en-US-Wavenet-G', 'en-US-Wavenet-H', 'en-US-Wavenet-I', 'en-US-Wavenet-J'], + 'es-ES': ['es-ES-Neural2-A', 'es-ES-Neural2-B', 'es-ES-Neural2-C', 'es-ES-Neural2-D', 'es-ES-Neural2-E', 'es-ES-Neural2-F', 'es-ES-Polyglot-1', 'es-ES-Standard-A', 'es-ES-Standard-B', 'es-ES-Standard-C', 'es-ES-Standard-D', 'es-ES-Studio-C', 'es-ES-Studio-F', 'es-ES-Wavenet-B', 'es-ES-Wavenet-C', 'es-ES-Wavenet-D'], + 'es-US': ['es-US-Neural2-A', 'es-US-Neural2-B', 'es-US-Neural2-C', 'es-US-News-D', 'es-US-News-E', 'es-US-News-F', 'es-US-News-G', 'es-US-Polyglot-1', 'es-US-Standard-A', 'es-US-Standard-B', 'es-US-Standard-C', 'es-US-Studio-B', 'es-US-Wavenet-A', 'es-US-Wavenet-B', 'es-US-Wavenet-C'], + 'eu-ES': ['eu-ES-Standard-A'], + 'fi-FI': ['fi-FI-Standard-A', 'fi-FI-Wavenet-A'], + 'fil-PH': ['fil-PH-Standard-A', 'fil-PH-Standard-B', 'fil-PH-Standard-C', 'fil-PH-Standard-D', 'fil-PH-Wavenet-A', 'fil-PH-Wavenet-B', 'fil-PH-Wavenet-C', 'fil-PH-Wavenet-D', 'fil-ph-Neural2-A', 'fil-ph-Neural2-D'], + 'fr-CA': ['fr-CA-Neural2-A', 'fr-CA-Neural2-B', 'fr-CA-Neural2-C', 'fr-CA-Neural2-D', 'fr-CA-Standard-A', 'fr-CA-Standard-B', 'fr-CA-Standard-C', 'fr-CA-Standard-D', 'fr-CA-Wavenet-A', 'fr-CA-Wavenet-B', 'fr-CA-Wavenet-C', 'fr-CA-Wavenet-D'], + 'fr-FR': ['fr-FR-Neural2-A', 'fr-FR-Neural2-B', 'fr-FR-Neural2-C', 'fr-FR-Neural2-D', 'fr-FR-Neural2-E', 'fr-FR-Polyglot-1', 'fr-FR-Standard-A', 'fr-FR-Standard-B', 'fr-FR-Standard-C', 'fr-FR-Standard-D', 'fr-FR-Standard-E', 'fr-FR-Studio-A', 'fr-FR-Studio-D', 'fr-FR-Wavenet-A', 'fr-FR-Wavenet-B', 'fr-FR-Wavenet-C', 'fr-FR-Wavenet-D', 'fr-FR-Wavenet-E'], + 'gl-ES': ['gl-ES-Standard-A'], + 'gu-IN': ['gu-IN-Standard-A', 'gu-IN-Standard-B', 'gu-IN-Standard-C', 'gu-IN-Standard-D', 'gu-IN-Wavenet-A', 'gu-IN-Wavenet-B', 'gu-IN-Wavenet-C', 'gu-IN-Wavenet-D'], + 'he-IL': ['he-IL-Standard-A', 'he-IL-Standard-B', 'he-IL-Standard-C', 'he-IL-Standard-D', 'he-IL-Wavenet-A', 'he-IL-Wavenet-B', 'he-IL-Wavenet-C', 'he-IL-Wavenet-D'], + 'hi-IN': ['hi-IN-Neural2-A', 'hi-IN-Neural2-B', 'hi-IN-Neural2-C', 'hi-IN-Neural2-D', 'hi-IN-Standard-A', 'hi-IN-Standard-B', 'hi-IN-Standard-C', 'hi-IN-Standard-D', 'hi-IN-Wavenet-A', 'hi-IN-Wavenet-B', 'hi-IN-Wavenet-C', 'hi-IN-Wavenet-D'], + 'hu-HU': ['hu-HU-Standard-A', 'hu-HU-Wavenet-A'], + 'id-ID': ['id-ID-Standard-A', 'id-ID-Standard-B', 'id-ID-Standard-C', 'id-ID-Standard-D', 'id-ID-Wavenet-A', 'id-ID-Wavenet-B', 'id-ID-Wavenet-C', 'id-ID-Wavenet-D'], + 'is-IS': ['is-IS-Standard-A'], + 'it-IT': ['it-IT-Neural2-A', 'it-IT-Neural2-C', 'it-IT-Standard-A', 'it-IT-Standard-B', 'it-IT-Standard-C', 'it-IT-Standard-D', 'it-IT-Wavenet-A', 'it-IT-Wavenet-B', 'it-IT-Wavenet-C', 'it-IT-Wavenet-D'], + 'ja-JP': ['ja-JP-Neural2-B', 'ja-JP-Neural2-C', 'ja-JP-Neural2-D', 'ja-JP-Standard-A', 'ja-JP-Standard-B', 'ja-JP-Standard-C', 'ja-JP-Standard-D', 'ja-JP-Wavenet-A', 'ja-JP-Wavenet-B', 'ja-JP-Wavenet-C', 'ja-JP-Wavenet-D'], + 'kn-IN': ['kn-IN-Standard-A', 'kn-IN-Standard-B', 'kn-IN-Standard-C', 'kn-IN-Standard-D', 'kn-IN-Wavenet-A', 'kn-IN-Wavenet-B', 'kn-IN-Wavenet-C', 'kn-IN-Wavenet-D'], + 'ko-KR': ['ko-KR-Neural2-A', 'ko-KR-Neural2-B', 'ko-KR-Neural2-C', 'ko-KR-Standard-A', 'ko-KR-Standard-B', 'ko-KR-Standard-C', 'ko-KR-Standard-D', 'ko-KR-Wavenet-A', 'ko-KR-Wavenet-B', 'ko-KR-Wavenet-C', 'ko-KR-Wavenet-D'], + 'lt-LT': ['lt-LT-Standard-A'], + 'lv-LV': ['lv-LV-Standard-A'], + 'ml-IN': ['ml-IN-Standard-A', 'ml-IN-Standard-B', 'ml-IN-Standard-C', 'ml-IN-Standard-D', 'ml-IN-Wavenet-A', 'ml-IN-Wavenet-B', 'ml-IN-Wavenet-C', 'ml-IN-Wavenet-D'], + 'mr-IN': ['mr-IN-Standard-A', 'mr-IN-Standard-B', 'mr-IN-Standard-C', 'mr-IN-Wavenet-A', 'mr-IN-Wavenet-B', 'mr-IN-Wavenet-C'], + 'ms-MY': ['ms-MY-Standard-A', 'ms-MY-Standard-B', 'ms-MY-Standard-C', 'ms-MY-Standard-D', 'ms-MY-Wavenet-A', 'ms-MY-Wavenet-B', 'ms-MY-Wavenet-C', 'ms-MY-Wavenet-D'], + 'nb-NO': ['nb-NO-Standard-A', 'nb-NO-Standard-B', 'nb-NO-Standard-C', 'nb-NO-Standard-D', 'nb-NO-Standard-E', 'nb-NO-Wavenet-A', 'nb-NO-Wavenet-B', 'nb-NO-Wavenet-C', 'nb-NO-Wavenet-D', 'nb-NO-Wavenet-E'], + 'nl-BE': ['nl-BE-Standard-A', 'nl-BE-Standard-B', 'nl-BE-Wavenet-A', 'nl-BE-Wavenet-B'], + 'nl-NL': ['nl-NL-Standard-A', 'nl-NL-Standard-B', 'nl-NL-Standard-C', 'nl-NL-Standard-D', 'nl-NL-Standard-E', 'nl-NL-Wavenet-A', 'nl-NL-Wavenet-B', 'nl-NL-Wavenet-C', 'nl-NL-Wavenet-D', 'nl-NL-Wavenet-E'], + 'pa-IN': ['pa-IN-Standard-A', 'pa-IN-Standard-B', 'pa-IN-Standard-C', 'pa-IN-Standard-D', 'pa-IN-Wavenet-A', 'pa-IN-Wavenet-B', 'pa-IN-Wavenet-C', 'pa-IN-Wavenet-D'], + 'pl-PL': ['pl-PL-Standard-A', 'pl-PL-Standard-B', 'pl-PL-Standard-C', 'pl-PL-Standard-D', 'pl-PL-Standard-E', 'pl-PL-Wavenet-A', 'pl-PL-Wavenet-B', 'pl-PL-Wavenet-C', 'pl-PL-Wavenet-D', 'pl-PL-Wavenet-E'], + 'pt-BR': ['pt-BR-Neural2-A', 'pt-BR-Neural2-B', 'pt-BR-Neural2-C', 'pt-BR-Standard-A', 'pt-BR-Standard-B', 'pt-BR-Standard-C', 'pt-BR-Studio-B', 'pt-BR-Studio-C', 'pt-BR-Wavenet-A', 'pt-BR-Wavenet-B', 'pt-BR-Wavenet-C'], + 'pt-PT': ['pt-PT-Standard-A', 'pt-PT-Standard-B', 'pt-PT-Standard-C', 'pt-PT-Standard-D', 'pt-PT-Wavenet-A', 'pt-PT-Wavenet-B', 'pt-PT-Wavenet-C', 'pt-PT-Wavenet-D'], + 'ro-RO': ['ro-RO-Standard-A', 'ro-RO-Wavenet-A'], + 'ru-RU': ['ru-RU-Standard-A', 'ru-RU-Standard-B', 'ru-RU-Standard-C', 'ru-RU-Standard-D', 'ru-RU-Standard-E', 'ru-RU-Wavenet-A', 'ru-RU-Wavenet-B', 'ru-RU-Wavenet-C', 'ru-RU-Wavenet-D', 'ru-RU-Wavenet-E'], + 'sk-SK': ['sk-SK-Standard-A', 'sk-SK-Wavenet-A'], + 'sr-RS': ['sr-RS-Standard-A'], + 'sv-SE': ['sv-SE-Standard-A', 'sv-SE-Standard-B', 'sv-SE-Standard-C', 'sv-SE-Standard-D', 'sv-SE-Standard-E', 'sv-SE-Wavenet-A', 'sv-SE-Wavenet-B', 'sv-SE-Wavenet-C', 'sv-SE-Wavenet-D', 'sv-SE-Wavenet-E'], + 'ta-IN': ['ta-IN-Standard-A', 'ta-IN-Standard-B', 'ta-IN-Standard-C', 'ta-IN-Standard-D', 'ta-IN-Wavenet-A', 'ta-IN-Wavenet-B', 'ta-IN-Wavenet-C', 'ta-IN-Wavenet-D'], + 'te-IN': ['te-IN-Standard-A', 'te-IN-Standard-B'], + 'th-TH': ['th-TH-Neural2-C', 'th-TH-Standard-A'], + 'tr-TR': ['tr-TR-Standard-A', 'tr-TR-Standard-B', 'tr-TR-Standard-C', 'tr-TR-Standard-D', 'tr-TR-Standard-E', 'tr-TR-Wavenet-A', 'tr-TR-Wavenet-B', 'tr-TR-Wavenet-C', 'tr-TR-Wavenet-D', 'tr-TR-Wavenet-E'], + 'uk-UA': ['uk-UA-Standard-A', 'uk-UA-Wavenet-A'], + 'vi-VN': ['vi-VN-Neural2-A', 'vi-VN-Neural2-D', 'vi-VN-Standard-A', 'vi-VN-Standard-B', 'vi-VN-Standard-C', 'vi-VN-Standard-D', 'vi-VN-Wavenet-A', 'vi-VN-Wavenet-B', 'vi-VN-Wavenet-C', 'vi-VN-Wavenet-D'], + 'yue-HK': ['yue-HK-Standard-A', 'yue-HK-Standard-B', 'yue-HK-Standard-C', 'yue-HK-Standard-D'], +} + +class GoogleWebTTSFree(TTSBase): name = 'GoogleWebTTS(Free)' alias = 'Google Web TTS (Free)' need_api_key = False api_key_hint = '' default_api_host = 'https://translate.google.com' default_timeout = 60 - request_interval = 10 - max_len_per_request = 500 + request_interval = 10 #额外的,好像每天只允许50个请求 + max_len_per_request = 1666 languages = gtts_languages def __init__(self, params): + super().__init__(params) params = params or {} self.params = params - host = params.get('api_host', 'google.com') + host = self.host or 'google.com' self.tld = host.split('google.')[-1] if 'google.' in host else 'com' - lang = params.get('language', 'en') - self.lang = lang if lang in self.languages else 'en' - self.timeout = self.params.get('timeout', self.default_timeout) - self.slow = (self.params.get('speed', 'normal') == 'slow') - + if self.language not in self.languages: + self.language = 'en' + slow = self.rate in ('slow', 'x-slow') + self.ttsFunc = partial(gtts.gTTS, tld=self.tld, lang=self.language, slow=slow, + lang_check=False, timeout=self.timeout) + #开始进行tts转换,返回 (mime,音频二进制) def tts(self, text): - tts = gtts.gTTS(text=text, tld=self.tld, lang=self.lang, slow=self.slow, - lang_check=False, timeout=self.timeout) buf = io.BytesIO() - tts.write_to_fp(buf) + self.ttsFunc(text=text).write_to_fp(buf) return ('audio/mpeg', buf.getvalue()) #https://cloud.google.com/text-to-speech/docs/create-audio#text-to-speech-text-python #https://cloud.google.com/text-to-speech/pricing -class GoogleTextToSpeech: +#需要先启用 'Cloud Text-to-Speech API' +#https://console.cloud.google.com/apis/api/texttospeech.googleapis.com/overview +class GoogleTextToSpeech(TTSBase): name = 'GoogleTextToSpeech(GAE only)' alias = 'Google Text To Speech (GAE only)' need_api_key = False api_key_hint = '' default_api_host = '' default_timeout = 60 - request_interval = 0.5 - max_len_per_request = 500 - languages = gtts_languages + #https://cloud.google.com/text-to-speech/quotas + request_interval = 2 + max_len_per_request = 1666 + languages = googletts_languages def __init__(self, params): - params = params or {} - self.params = params - lang = params.get('language', 'en') - self.lang = lang if lang in self.languages else 'en' - self.timeout = self.params.get('timeout', self.default_timeout) - self.slow = (self.params.get('speed', 'normal') == 'slow') + super().__init__(params) + if self.language not in self.languages: + self.language = 'en' self.client = texttospeech.TextToSpeechClient() + #Names of voices can be retrieved with client.list_voices(). + #omit ssml_gender=texttospeech.SsmlVoiceGender.FEMALE + self.voiceCfg = texttospeech.VoiceSelectionParams(language_code=self.language, name=self.voice) + self.audioCfg = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3) + self.reqDict = {"voice": self.voice, "audio_config": self.audioCfg} #开始进行tts转换,返回 (mime,音频二进制) #Limit is 5000 bytes per request #https://cloud.google.com/text-to-speech/quotas def tts(self, text): - client = texttospeech.TextToSpeechClient() - input_text = texttospeech.SynthesisInput(text=text) - - # Note: the voice can also be specified by name. - # Names of voices can be retrieved with client.list_voices(). - voice = texttospeech.VoiceSelectionParams( - language_code="en-US", - name="en-US-Standard-C", - ssml_gender=texttospeech.SsmlVoiceGender.FEMALE, - ) - - audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3) - - response = client.synthesize_speech( - request={"input": input_text, "voice": voice, "audio_config": audio_config} - ) - - return ('audio/mpeg', response.audio_content) + self.reqDict["ssml"] = self.ssml(text) + resp = self.client.synthesize_speech(request=self.reqDict) + return ('audio/mpeg', resp.audio_content) + #获取支持的语音列表,注意,这个会返回一个超级大的json对象 + #或者可以直接到网页去查询 + #https://cloud.google.com/text-to-speech/docs/voices + def voice_list(self): + voices = self.client.list_voices() + return voices diff --git a/application/lib/ebook_tts/engines/tts_base.py b/application/lib/ebook_tts/engines/tts_base.py new file mode 100644 index 00000000..480b7bc8 --- /dev/null +++ b/application/lib/ebook_tts/engines/tts_base.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# 调用在线文本转语音TTS基类 +from xml.etree import ElementTree as ET + +class TTSBase: + name = '' + alias = '' + need_api_key = False + api_key_hint = '' + default_api_host = '' + default_timeout = 60 + request_interval = 1 + max_len_per_request = 500 + languages = {} + regions = {} + region_url = '' #一个链接,可以在这个链接网页上找到可用的区域 + voice_url = '' #一个链接,可以在这个网页上找到语音名称列表 + language_url = '' #一个链接,可以在这个网页上找到支持的语种列表 + + #语音语调的允许常量值列表,除了使用常量值,也可以使用一个正负数值,比如 100%, +1.5, -30.00% 等 + prosody_attributes = { + 'rate': ('x-slow', 'slow', 'medium', 'fast', 'x-fast'), + 'pitch': ('x-low', 'low', 'medium', 'high', 'x-high'), + 'volume': ('silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud') + } + + def __init__(self, params): + params = params or {} + self.params = params + self.language = params.get('language', 'en-US') + self.voice = params.get('voice', '') + self.rate = params.get('rate', 'medium') + self.pitch = params.get('pitch', 'medium') + self.volume = params.get('volume', 'medium') + self.key = params.get('api_key', '') + self.host = params.get('api_host', TTSBase.default_api_host) + self.timeout = params.get('timeout', TTSBase.default_timeout) + self.region = params.get('region', '') + + #构建一个简单的ssml字符串,返回一个utf-8编码后的二进制字节串 + #text, language, voice: 要转换的文本,语种代码,语音名字 + #pitch: 音调, rate: 语速, volume: 音量 + def ssml(self, text): + root = ET.Element('speak', version='1.0', xmlns='http://www.w3.org/2001/10/synthesis') + root.set('xml:lang', self.language) + voiceNode = ET.SubElement(root, 'voice', name=self.voice) + prosody = ET.SubElement(voiceNode, 'prosody', pitch=self.pitch, rate=self.rate, volume=self.volume) + prosody.text = text #xml模块会自动转义非法字符串 + return ET.tostring(root, encoding="utf-8", method="xml", xml_declaration=False) diff --git a/application/lib/ebook_tts/html_audiolator.py b/application/lib/ebook_tts/html_audiolator.py index dc2c2edb..d82e2c67 100644 --- a/application/lib/ebook_tts/html_audiolator.py +++ b/application/lib/ebook_tts/html_audiolator.py @@ -11,7 +11,9 @@ def get_tts_engines(): for name, engine in builtin_tts_engines.items(): info[name] = {'alias': engine.alias, 'need_api_key': engine.need_api_key, 'default_api_host': engine.default_api_host, 'api_key_hint': engine.api_key_hint, - 'languages': engine.languages} + 'languages': engine.languages, 'region_url': engine.region_url, + 'voice_url': engine.voice_url, 'language_url': engine.language_url, + 'regions': engine.regions} return info class HtmlAudiolator: @@ -162,3 +164,5 @@ def split_strings(self, strings, max_len): result.append(item) return result + + \ No newline at end of file diff --git a/application/lib/ssml_builder.py b/application/lib/ssml_builder.py deleted file mode 100644 index 2bd3d5c2..00000000 --- a/application/lib/ssml_builder.py +++ /dev/null @@ -1,227 +0,0 @@ -# -*- coding: utf-8 -*- -#https://github.com/Reverseblade/ssml-builder - -import re - - -class Speech: - - VALID_INTERPRET_AS = ('characters', 'spell-out', 'cardinal', 'number', - 'ordinal', 'digits', 'fraction', 'unit', 'date', - 'time', 'telephone', 'address', 'interjection', 'expletive') - - VALID_PROSODY_ATTRIBUTES = { - 'rate': ('x-slow', 'slow', 'medium', 'fast', 'x-fast'), - 'pitch': ('x-low', 'low', 'medium', 'high', 'x-high'), - 'volume': ('silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud') - } - - VALID_VOICE_NAMES = ('Ivy', 'Joanna', 'Joey', 'Justin', 'Kendra', 'Kimberly', - 'Matthew', 'Salli', 'Nicole', 'Russell', 'Amy', 'Brian', 'Emma', - 'Aditi', 'Raveena', 'Hans', 'Marlene', 'Vicki', 'Conchita', 'Enrique', - 'Carla', 'Giorgio', 'Mizuki', 'Takumi', 'Celine', 'Lea', 'Mathieu') - - VALID_EMPHASIS_LEVELS = ('strong', 'moderate', 'reduced') - - def __init__(self): - self.speech = "" - - def speak(self): - """ - - :return: - """ - return '{}'.format(self.speech) - - def add_text(self, value): - """ - add text - :return: - """ - self.speech += value - return self - - def say_as(self, value, interpret_as, is_nested=False): - """ - - :param value: - :param interpret_as: - :param is_nested: - :return: - """ - - if interpret_as not in self.VALID_INTERPRET_AS: - raise ValueError('The interpret-as provided to say_as is not valid') - - ssml = '' \ - '{value}'.format(interpret_as=interpret_as, value=value) - - if is_nested: - return ssml - - self.speech += ssml - return self - - def prosody(self, value, rate='medium', pitch='medium', volume='medium', is_nested=False): - """ - - :param value: - :param rate: - :param pitch: - :param volume: - :param is_nested: - :return: - """ - - if rate not in self.VALID_PROSODY_ATTRIBUTES['rate']: - if re.match(r'^\d+%$', rate) is None: - raise ValueError('The rate provided to prosody is not valid') - - if pitch not in self.VALID_PROSODY_ATTRIBUTES['pitch']: - if re.match(r'^(\+|\-)+\d+(\.\d+)*%$', pitch) is None: - raise ValueError('The pitch provided to prosody is not valid') - - if volume not in self.VALID_PROSODY_ATTRIBUTES['volume']: - raise ValueError('The volume provided to prosody is not valid') - - ssml = '' \ - '{value}'.format(rate=rate, pitch=pitch, volume=volume, value=value) - - if is_nested: - return ssml - - self.speech += ssml - return self - - def sub(self, value, alias, is_nested=False): - """ - - :param value: - :param alias: - :param is_nested: - :return: - """ - - ssml = '{}'.format(alias, value) - - if is_nested: - return ssml - - self.speech += ssml - return self - - def lang(self, value, lang, is_nested=False): - """ - - :param value: - :param lang: - :param is_nested: - :return: - """ - - ssml = '{}'.format(lang, value) - - if is_nested: - return ssml - - self.speech += ssml - return self - - def voice(self, value, name, is_nested=False): - """ - - :param value: - :param name: - :return: - """ - - #if name not in self.VALID_VOICE_NAMES: - # raise ValueError('The name provided to voice is not valid') - - ssml = '{}'.format(name, value) - - if is_nested: - return ssml - - self.speech += '{}'.format(name, value) - return self - - def pause(self, time, is_nested=False): - """ - - :param time: - :param is_nested: - :return: - """ - - ssml = ''.format(time) - - if is_nested: - return ssml - - self.speech += ssml - return self - - def whisper(self, value, is_nested=False): - """ - :param value: - :param is_nested: - :return: - """ - - ssml = '{}'.format(value) - - if is_nested: - return ssml - - self.speech += ssml - return self - - def audio(self, src, is_nested=False): - """ - :param src: - :param is_nested: - :return: - """ - - ssml = '