From 2b1f0c7500be5316ca7d41df43427eefe11a4ce4 Mon Sep 17 00:00:00 2001 From: BingLingGroup <42505588+BingLingGroup@users.noreply.github.com> Date: Wed, 6 May 2020 16:31:00 +0800 Subject: [PATCH] Change (close #108) langcodes into the optional dependency --- CHANGELOG.md | 1 + README.md | 60 +++++++++++++++++++++++++++++++----- autosub/cmdline_utils.py | 15 ++++++--- autosub/constants.py | 5 +++ autosub/lang_code_utils.py | 37 ++++++++++++++-------- autosub/options.py | 3 +- docs/CHANGELOG.zh-Hans.md | 1 + docs/README.zh-Hans.md | 63 ++++++++++++++++++++++++++++++++------ requirements.txt | 3 +- setup.py | 5 +-- 10 files changed, 154 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a4afc23..1f9a78c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,6 +69,7 @@ Click up arrow to go back to TOC. - Change the default style selection in subtitles translation. - Change the loglevel in ffmpeg commands into `-loglevel error`. - Change DEFAULT_MIN_REGION_SIZE to 0.5. +- Change langcodes into the optional dependency. #### Fixed(Unreleased) diff --git a/README.md b/README.md index c81d753d..c9938443 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ Color: [Solarized](https://en.wikipedia.org/wiki/Solarized_(color_scheme)#Colors 1. [Description](#description) 2. [License](#license) 3. [Dependencies](#dependencies) + - 3.1 [Optional Dependencies](#optional-dependencies) + - 3.2 [Required Dependencies](#required-dependencies) 4. [Download and Installation](#download-and-installation) - 4.1 [Branches](#branches) - 4.2 [Install on Ubuntu](#install-on-ubuntu) @@ -77,17 +79,31 @@ This repo has a different license from [the original repo](https://github.com/ag Autosub depends on these third party softwares or Python site-packages. Much appreciation to all of these projects. +#### Optional dependencies + - [ffmpeg](https://ffmpeg.org/) - [ffprobe](https://ffmpeg.org/ffprobe.html) -- [auditok](https://github.com/amsehili/auditok) +- [langcodes](https://github.com/LuminosoInsight/langcodes) +- [ffmpeg-normalize](https://github.com/slhck/ffmpeg-normalize) +- [python-Levenshtein](https://github.com/ztane/python-Levenshtein)(Used by [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)) + +For windows user: + +- [Build Tools for Visual Studio 2019](https://visualstudio.microsoft.com/downloads/) + - Used by [marisa-trie](https://github.com/pytries/marisa-trie) when installing. + - [marisa-trie](https://github.com/pytries/marisa-trie) is the dependency of the [langcodes](https://github.com/LuminosoInsight/langcodes)) + - Probable components installation: MSVC v14 VS 2019 C++ build tools, windows 10 SDK. + +#### Required dependencies + +- [auditok 0.1.5](https://github.com/amsehili/auditok) - [pysubs2](https://github.com/tkarabela/pysubs2) - [wcwidth](https://github.com/jquast/wcwidth) - [requests](https://github.com/psf/requests) -- [langcodes](https://github.com/LuminosoInsight/langcodes) +- [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy) - [progressbar2](https://github.com/WoLpH/python-progressbar) - [websocket-client](https://github.com/websocket-client/websocket-client) - [py-googletrans](https://github.com/ssut/py-googletrans) -- [ffmpeg-normalize](https://github.com/slhck/ffmpeg-normalize) [requirements.txt](requirements.txt). @@ -99,9 +115,17 @@ About how to install these dependencies, see [Download and Installation](#downlo Except the PyPI version, others include non-original codes not from the original repository. -After autosub-0.4.0, all of the codes is compatible with both Python 2.7 and Python 3. It don't matter if you change the Python version in the installation commands below. +0.4.0 > autosub + +- These versions are only compatible with Python 2.7. + +0.5.6a >= autosub >= 0.4.0 -About the dependencies installation. If you install autosub by pip, ffmpeg and ffmpeg-normalize won't be installed together not like the Python site-packages already listed on the `setup.py` or `requirements.txt`. You need to install them separately. But of course they are optional. They aren't necessary if you only use autosub to translate your subtitles. +- These versions are compatible with both Python 2.7 and Python 3. It don't matter if you change the Python version in the installation commands below. + +autosub >= 0.5.7a + +- These versions are only compatible with Python 3. ffmpeg, ffprobe, ffmpeg-normalize need to be put on one of these places to let the autosub detect and use them. The following codes are in the [constants.py](autosub/constants.py). Priority is determined in order. @@ -147,6 +171,13 @@ apt install ffmpeg python python-pip git -y pip install git+https://github.com/BingLingGroup/autosub.git@alpha ffmpeg-normalize ``` +Install from `dev` branch.(latest autosub dev version) + +```bash +apt install ffmpeg python python-pip git -y +pip install git+https://github.com/BingLingGroup/autosub.git@dev ffmpeg-normalize langcodes +``` + Install from `origin` branch.(autosub-0.4.0a) ```bash @@ -187,13 +218,24 @@ Choco installation command is for cmd.(not Powershell) @"%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell.exe" -NoProfile -InputFormat None -ExecutionPolicy Bypass -Command "iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))" && SET "PATH=%PATH%;%ALLUSERSPROFILE%\chocolatey\bin" ``` +If you don't have visual studio + Install from `alpha` branch.(latest autosub alpha release) ```batch -choco install git python2 curl ffmpeg -y +choco install git python curl ffmpeg -y curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py python get-pip.py -pip install git+https://github.com/BingLingGroup/autosub.git@alpha ffmpeg-normalize +pip install git+https://github.com/BingLingGroup/autosub.git@alpha ffmpeg-normalize langcodes +``` + +Install from `dev` branch.(latest autosub dev version) + +```batch +choco install git python curl ffmpeg -y +curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py +python get-pip.py +pip install git+https://github.com/BingLingGroup/autosub.git@dev ffmpeg-normalize langcodes ``` Install from `origin` branch.(autosub-0.4.0a) @@ -609,13 +651,15 @@ autosub -sapi baidu -i input_file -sconf baidu_speech_config ...(other options) Translate subtitles to another language. +If not input option `-SRC`, the translation source language will be auto-detected by py-googletrans. + Translate subtitles from an audio/video file. ``` autosub -i input_file -S lang_code (-SRC lang_code) -D lang_code ``` -Translate subtitles from a subtitles file. +Translate subtitles from a subtitles file.(Translation source language auto-detection by py-googletrans) ``` autosub -i input_file -SRC lang_code -D lang_code diff --git a/autosub/cmdline_utils.py b/autosub/cmdline_utils.py index 3bc45127..1fd8ac26 100644 --- a/autosub/cmdline_utils.py +++ b/autosub/cmdline_utils.py @@ -16,7 +16,6 @@ # Import third-party modules import auditok import googletrans -import langcodes import pysubs2 # Any changes to the path and your own modules @@ -385,10 +384,18 @@ def validate_aovp_args(args): # pylint: disable=too-many-branches, too-many-ret match_list=list(constants.SPEECH_TO_TEXT_LANGUAGE_CODES.keys()), min_score=args.min_score) if best_result: - print(_("Use langcodes to standardize the result.")) - args.speech_language = langcodes.standardize_tag(best_result[0]) print(_("Use \"{lang_code}\" instead.").format( - lang_code=args.speech_language)) + lang_code=best_result[0])) + args.speech_language = best_result[0] + if constants.langcodes_: + print(_("Use langcodes to standardize the result.")) + args.speech_language = constants.langcodes_.standardize_tag( + best_result[0]) + print(_("Use \"{lang_code}\" instead.").format( + lang_code=args.speech_language)) + else: + print(_("Use the lower case.")) + args.speech_language = best_result[0] else: print(_("Match failed. Still using \"{lang_code}\".").format( lang_code=args.speech_language)) diff --git a/autosub/constants.py b/autosub/constants.py index 87aff84a..d9d0c644 100644 --- a/autosub/constants.py +++ b/autosub/constants.py @@ -18,6 +18,11 @@ except DistributionNotFound: IS_GOOGLECLOUDCLIENT = False +try: + import langcodes as langcodes_ # pylint: disable=unused-import +except ImportError: + langcodes_ = None + # Any changes to the path and your own modules SUPPORTED_LOCALE = { diff --git a/autosub/lang_code_utils.py b/autosub/lang_code_utils.py index 83e1e348..7baae63f 100644 --- a/autosub/lang_code_utils.py +++ b/autosub/lang_code_utils.py @@ -7,12 +7,16 @@ import gettext # Import third-party modules -import langcodes import wcwidth # Any changes to the path and your own modules from autosub import constants +if not constants.langcodes_: + from fuzzywuzzy import process +else: + process = None # pylint: disable=invalid-name + LANG_CODE_TEXT = gettext.translation(domain=__name__, localedir=constants.LOCALE_PATH, languages=[constants.CURRENT_LOCALE], @@ -52,18 +56,22 @@ def better_match(desired_language, match_scores = [] unsupported_languages = [] - for supported in supported_languages: - try: - score = langcodes.tag_match_score(desired_language, supported) - match_scores.append((supported, score)) - except langcodes.tag_parser.LanguageTagError: - unsupported_languages.append(supported) - continue - - match_scores = [ - (supported, score) for (supported, score) in match_scores - if score >= min_score - ] + if constants.langcodes_: + for supported in supported_languages: + try: + score = constants.langcodes_.tag_match_score(desired_language, supported) + if score >= min_score: + match_scores.append((supported, score)) + except constants.langcodes_.tag_parser.LanguageTagError: + unsupported_languages.append(supported) + continue + else: + match_scores = process.extract(query=desired_language, + choices=supported_languages) + match_scores = [ + (supported, score) for (supported, score) in match_scores + if score >= min_score + ] if not match_scores: match_scores.append(('und', 0)) @@ -104,6 +112,9 @@ def match_print( print(_("Now match lang codes.")) + if not constants.langcodes_: + print(_("Langcodes dependency not found. Use fuzzywuzzy instead.")) + if min_score < 0 or min_score > 100: print(_("The value of arg of \"-mns\"/\"--min-score\" isn't legal.")) return None diff --git a/autosub/options.py b/autosub/options.py index 708e0747..e8c08c0e 100644 --- a/autosub/options.py +++ b/autosub/options.py @@ -165,9 +165,10 @@ def get_cmd_parser(): # pylint: disable=too-many-statements '-bm', '--best-match', metavar=_('mode'), nargs="*", - help=_("Allow langcodes to get a best matching lang code " + help=_("Use langcodes to get a best matching lang code " "when your input is wrong. " "Only functional for py-googletrans and Google Speech API. " + "If langcodes not installed, use fuzzywuzzy instead. " "Available modes: " "s, src, d, all. " "\"s\" for \"-S\"/\"--speech-language\". " diff --git a/docs/CHANGELOG.zh-Hans.md b/docs/CHANGELOG.zh-Hans.md index aeaa65e9..0dc1caca 100644 --- a/docs/CHANGELOG.zh-Hans.md +++ b/docs/CHANGELOG.zh-Hans.md @@ -66,6 +66,7 @@ - 修改字幕翻译中字幕样式选择的默认方式。 - 修改ffmpeg指令中的loglevel为`-loglevel error`。 - 修改DEFAULT_MIN_REGION_SIZE为0.5。 +- 修改langcodes为可选依赖。 #### 修复(未发布) diff --git a/docs/README.zh-Hans.md b/docs/README.zh-Hans.md index 771e1c8e..820483f5 100644 --- a/docs/README.zh-Hans.md +++ b/docs/README.zh-Hans.md @@ -23,6 +23,8 @@ 1. [介绍](#介绍) 2. [证书](#证书) 3. [依赖](#依赖) + - 3.1 [可选依赖](#可选依赖) + - 3.2 [必需依赖](#必需依赖) 4. [下载与安装](#下载与安装) - 4.1 [分支](#分支) - 4.2 [在Ubuntu上安装](#在Ubuntu上安装) @@ -77,16 +79,31 @@ Autosub是一个字幕自动生成工具。它能使用Auditok来自动检测语 Autosub依赖于这些第三方的软件或者Python的site-packages。非常感谢以下这些项目的工作。 +#### 可选依赖 + - [ffmpeg](https://ffmpeg.org/) - [ffprobe](https://ffmpeg.org/ffprobe.html) -- [auditok](https://github.com/amsehili/auditok) +- [ffmpeg-normalize](https://github.com/slhck/ffmpeg-normalize) +- [langcodes](https://github.com/LuminosoInsight/langcodes) +- [python-Levenshtein](https://github.com/ztane/python-Levenshtein)([fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy)的可选依赖) + +对于windows用户: + +- [Visual Studio 2019 生成工具](https://visualstudio.microsoft.com/downloads/) + - [marisa-trie](https://github.com/pytries/marisa-trie)安装时会用到。 + - [marisa-trie](https://github.com/pytries/marisa-trie)是[langcodes](https://github.com/LuminosoInsight/langcodes))的依赖。 + - 大概需要安装以下两个组件:MSVC v14 VS 2019 C++生成工具, windows 10 SDK。 + +#### 必需依赖 + +- [auditok 0.1.5](https://github.com/amsehili/auditok) - [pysubs2](https://github.com/tkarabela/pysubs2) - [wcwidth](https://github.com/jquast/wcwidth) -- [langcodes](https://github.com/LuminosoInsight/langcodes) +- [requests](https://github.com/psf/requests) +- [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy) - [progressbar2](https://github.com/WoLpH/python-progressbar) - [websocket-client](https://github.com/websocket-client/websocket-client) - [py-googletrans](https://github.com/ssut/py-googletrans) -- [ffmpeg-normalize](https://github.com/slhck/ffmpeg-normalize) [requirements.txt](requirements.txt)。 @@ -98,9 +115,17 @@ Autosub依赖于这些第三方的软件或者Python的site-packages。非常感 除去PyPI版本的代码和原仓库的一致,其他的安装方式均包含非原仓库的代码。 -在autosub-0.4.0之后,所有的代码都是Python3和Python2.7兼容的。所以后面的安装指令中的Python版本你可以随便改。 +0.4.0 > autosub + +- 这些版本只与Python 2.7兼容。 + +0.5.6a >= autosub >= 0.4.0 -至于依赖的安装,如果你是通过pip来安装的autosub,那么ffmpeg和ffmpeg-normalize不会被一块儿安装,不像site-packages那样列在`setup.py`或者`requirements.txt`里面自动安装了。你需要分别安装它们。当然安装是可选的,如果你只是翻译字幕,不需要安装这两个软件。 +- 这些版本与Python3和Python2.7兼容。所以后面的安装指令中的Python版本你可以随便改。 + +autosub >= 0.5.7a + +- 这些版本只与Python 3兼容。 ffmpeg, ffprobe, ffmpeg-normalize需要被放在以下位置之一来让autosub检测并使用。以下代码都在[constants.py](autosub/constants.py)里。优先级按照先后顺序确定。 @@ -137,13 +162,20 @@ pip install . #### 在Ubuntu上安装 -第一行包含依赖的安装。 +包含依赖的安装。 从`alpha`分支安装。(最新alpha发布版) ```bash apt install ffmpeg python python-pip git -y -pip install git+https://github.com/BingLingGroup/autosub.git@alpha ffmpeg-normalize +pip install git+https://github.com/BingLingGroup/autosub.git@alpha ffmpeg-normalize langcodes +``` + +从`dev`分支安装。(最新dev版) + +```bash +apt install ffmpeg python python-pip git -y +pip install git+https://github.com/BingLingGroup/autosub.git@dev ffmpeg-normalize langcodes ``` 从`origin`分支安装。(autosub-0.4.0a) @@ -189,12 +221,21 @@ pip install autosub 从`alpha`分支安装。(最新alpha发布版) ```batch -choco install git python2 curl ffmpeg -y +choco install git python curl ffmpeg -y curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py python get-pip.py pip install git+https://github.com/BingLingGroup/autosub.git@alpha ffmpeg-normalize ``` +从`dev`分支安装。(最新dev版) + +```batch +choco install git python curl ffmpeg -y +curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py +python get-pip.py +pip install git+https://github.com/BingLingGroup/autosub.git@dev ffmpeg-normalize langcodes +``` + 从`origin`分支安装。(autosub-0.4.0a) ```batch @@ -206,7 +247,7 @@ pip install git+https://github.com/BingLingGroup/autosub.git@origin PyPI的版本(autosub-0.3.12)不推荐在windows上使用,因为它无法成功运行。查看[origin分支的更新日志](CHANGELOG.zh-Hans.md#040-alpha---2019-02-17)来了解详情。 -推荐使用`python`而不是`python2`在autosub-0.4.0之后。 +在autosub-0.4.0之后,推荐使用`python`而不是`python2`。  ↑  @@ -610,6 +651,8 @@ autosub -sapi baidu -i 输入文件 -sconf 百度语音配置文件 ...(其他 将字幕翻译为别的语言。 +如果不输入选项`-SRC`,翻译源语言会被py-googletrans自动检测。 + 从音频/视频文件翻译字幕。 ``` @@ -619,7 +662,7 @@ autosub -i 输入文件 -S 语言代码 (-SRC 语言代码) -D 语言代码 从字幕文件翻译字幕。 ``` -autosub -i 输入文件 -SRC 语言代码 -D 语言代码 +autosub -i 输入文件 (-SRC 语言代码) -D 语言代码 ``` 使用"translate.google.cn"翻译字幕,"translate.google.cn"可被某地直连。 diff --git a/requirements.txt b/requirements.txt index a6711458..b6eb6fc7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,8 @@ pysubs2>=0.2.4 progressbar2>=3.34.3 auditok==0.1.5 googletrans>=2.4.0 -langcodes>=1.2.0 wcwidth>=0.1.7 google-cloud-speech>=1.3.1 websocket-client>=0.56.0 +fuzzywuzzy>=0.18.0 +python-Levenshtein>=0.12.0 diff --git a/setup.py b/setup.py index 1b4b03fe..f0dbaef1 100644 --- a/setup.py +++ b/setup.py @@ -36,10 +36,11 @@ 'progressbar2>=3.34.3', 'auditok==0.1.5', 'googletrans>=2.4.0', - 'langcodes>=1.2.0', 'wcwidth>=0.1.7', + 'fuzzywuzzy>=0.18.0', 'google-cloud-speech>=1.3.1', - 'websocket-client>=0.56.0' + 'websocket-client>=0.56.0', + 'python-Levenshtein>=0.12.0' ], license=open(os.path.join(here, "LICENSE")).read() )