diff --git a/changelog.d/1175.change.rst b/changelog.d/1175.change.rst new file mode 100644 index 00000000..490e8430 --- /dev/null +++ b/changelog.d/1175.change.rst @@ -0,0 +1 @@ +Add cli option to prefer or disfavor hearing impaired (-hi/-HI) or foreign only (-fo/-FO) subtitles. diff --git a/docs/config.toml b/docs/config.toml index c8816c68..34c1908e 100644 --- a/docs/config.toml +++ b/docs/config.toml @@ -22,6 +22,7 @@ provider = ["addic7ed", "opensubtitlescom", "opensubtitles"] refiner = ["metadata", "hash", "omdb"] ignore_refiner = ["tmdb"] language = ["fr", "en", "pt-br"] +foreign_only = false encoding = "utf-8" min_score = 50 archives = true diff --git a/pyproject.toml b/pyproject.toml index aec3cea6..7a51e4c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -321,6 +321,8 @@ extend-ignore-re = [ "(?Rm)^.*#\\s*spellchecker:\\s*disable-line$", "#\\s*spellchecker:off\\s*\\n.*\\n\\s*#\\s*spellchecker:on" ] +[tool.typos.default.extend-words] +fo = "fo" [tool.typos.default.extend-identifiers] tha = "tha" bre = "bre" diff --git a/subliminal/cli.py b/subliminal/cli.py index a92eec43..674252e0 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -40,7 +40,6 @@ ) from subliminal.core import ARCHIVE_EXTENSIONS, scan_name, search_external_subtitles from subliminal.extensions import get_default_providers, get_default_refiners -from subliminal.score import match_hearing_impaired from subliminal.utils import merge_extend_and_ignore_unions if TYPE_CHECKING: @@ -142,6 +141,12 @@ def configure(ctx: click.Context, param: click.Parameter | None, filename: str | # make download options download_dict = toml_dict.setdefault('download', {}) + # handle language types + for lt in ('hearing_impaired', 'foreign_only'): + # if an option was defined in the config file, make it a tuple, the expected type + if lt in download_dict and (isinstance(download_dict[lt], bool) or download_dict[lt] is None): + download_dict[lt] = (download_dict[lt],) + # remove the provider and refiner lists to select, extend and ignore provider_lists = { 'select': download_dict.pop('provider', []), @@ -411,7 +416,42 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None: ), ) @click.option('-f', '--force', is_flag=True, default=False, help='Force download even if a subtitle already exist.') -@click.option('-hi', '--hearing-impaired', is_flag=True, default=False, help='Prefer hearing impaired subtitles.') +@click.option( + '-fo', + '--foreign-only', + 'foreign_only', + is_flag=True, + flag_value=True, + multiple=True, + help='Prefer foreign-only subtitles.', +) +@click.option( + '-FO', + '--no-foreign-only', + 'foreign_only', + is_flag=True, + flag_value=False, + multiple=True, + help='Disfavor foreign-only subtitles.', +) +@click.option( + '-hi', + '--hearing-impaired', + 'hearing_impaired', + is_flag=True, + flag_value=True, + multiple=True, + help='Prefer hearing-impaired subtitles.', +) +@click.option( + '-HI', + '--no-hearing-impaired', + 'hearing_impaired', + is_flag=True, + flag_value=False, + multiple=True, + help='Disfavor hearing-impaired subtitles.', +) @click.option( '-m', '--min-score', @@ -423,7 +463,7 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None: '--language-type-suffix', is_flag=True, default=False, - help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign part subtitle.', + help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign only subtitle.', ) @click.option( '--language-format', @@ -468,7 +508,8 @@ def download( original_encoding: bool, single: bool, force: bool, - hearing_impaired: bool, + hearing_impaired: tuple[bool | None, ...], + foreign_only: tuple[bool | None, ...], min_score: int, language_type_suffix: bool, language_format: str, @@ -496,6 +537,14 @@ def download( elif encoding is None: encoding = 'utf-8' + # language_type + hearing_impaired_flag: bool | None = None + if len(hearing_impaired) > 0: + hearing_impaired_flag = hearing_impaired[-1] + foreign_only_flag: bool | None = None + if len(foreign_only) > 0: + foreign_only_flag = foreign_only[-1] + debug = obj.get('debug', False) if debug: verbose = 3 @@ -649,7 +698,8 @@ def download( v, language_set, min_score=scores['hash'] * min_score // 100, - hearing_impaired=hearing_impaired, + hearing_impaired=hearing_impaired_flag, + foreign_only=foreign_only_flag, only_one=single, ignore_subtitles=ignore_subtitles, ) @@ -701,11 +751,8 @@ def download( else: score_color = 'green' - # scale score from 0 to 100 taking out preferences - scaled_score = score - if match_hearing_impaired(s, hearing_impaired=hearing_impaired): - scaled_score -= scores['hearing_impaired'] - scaled_score *= 100 / scores['hash'] + # scale score from 0 to 100 + scaled_score = score * 100 / scores['hash'] # echo some nice colored output language_str = ( diff --git a/subliminal/core.py b/subliminal/core.py index 24ab6b08..c464b9cc 100644 --- a/subliminal/core.py +++ b/subliminal/core.py @@ -24,7 +24,7 @@ refiner_manager, ) from .score import compute_score as default_compute_score -from .subtitle import SUBTITLE_EXTENSIONS, Subtitle +from .subtitle import SUBTITLE_EXTENSIONS, LanguageType, Subtitle from .utils import get_age, handle_exception from .video import VIDEO_EXTENSIONS, Episode, Movie, Video @@ -148,7 +148,7 @@ def list_subtitles_provider(self, provider: str, video: Video, languages: Set[La try: return self[provider].list_subtitles(video, provider_languages) except Exception as e: # noqa: BLE001 - handle_exception(e, 'Provider {provider}') + handle_exception(e, f'Provider {provider}') return [] @@ -220,7 +220,8 @@ def download_best_subtitles( languages: Set[Language], *, min_score: int = 0, - hearing_impaired: bool = False, + hearing_impaired: bool | None = None, + foreign_only: bool | None = None, only_one: bool = False, compute_score: ComputeScore | None = None, ignore_subtitles: Sequence[str] | None = None, @@ -234,10 +235,11 @@ def download_best_subtitles( :param languages: languages to download. :type languages: set of :class:`~babelfish.language.Language` :param int min_score: minimum score for a subtitle to be downloaded. - :param bool hearing_impaired: hearing impaired preference. + :param (bool | None) hearing_impaired: hearing impaired preference (yes/no/indifferent). + :param (bool | None) foreign_only: foreign only preference (yes/no/indifferent). :param bool only_one: download only one subtitle, not one per language. :param compute_score: function that takes `subtitle` and `video` as positional arguments, - `hearing_impaired` as keyword argument and returns the score. + and returns the score. :param ignore_subtitles: list of subtitle ids to ignore (None defaults to an empty list). :return: downloaded subtitles. :rtype: list of :class:`~subliminal.subtitle.Subtitle` @@ -249,9 +251,19 @@ def download_best_subtitles( # ignore subtitles subtitles = [s for s in subtitles if s.id not in ignore_subtitles] + # sort by hearing impaired and foreign only + language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, foreign_only=foreign_only) + if language_type != LanguageType.UNKNOWN: + logger.info('Sort subtitles by %s types first', language_type.value) + subtitles = sorted( + subtitles, + key=lambda s: s.language_type == language_type, + reverse=True, + ) + # sort subtitles by score scored_subtitles = sorted( - [(s, compute_score(s, video, hearing_impaired=hearing_impaired)) for s in subtitles], + [(s, compute_score(s, video)) for s in subtitles], key=operator.itemgetter(1), reverse=True, ) @@ -411,7 +423,7 @@ def parse_subtitle_filename(subtitle_filename: str, video_filename: str) -> Subt except (ValueError, LanguageReverseError): logger.exception('Cannot parse language code %r', language_code) - # TODO: extract the hearing_impaired or forced attribute + # TODO: extract the hearing_impaired or foreign_only attribute return Subtitle(language, subtitle_id=subtitle_filename) @@ -775,7 +787,8 @@ def download_best_subtitles( languages: Set[Language], *, min_score: int = 0, - hearing_impaired: bool = False, + hearing_impaired: bool | None = None, + foreign_only: bool | None = None, only_one: bool = False, compute_score: ComputeScore | None = None, pool_class: type[ProviderPool] = ProviderPool, @@ -790,7 +803,8 @@ def download_best_subtitles( :param languages: languages to download. :type languages: set of :class:`~babelfish.language.Language` :param int min_score: minimum score for a subtitle to be downloaded. - :param bool hearing_impaired: hearing impaired preference. + :param (bool | None) hearing_impaired: hearing impaired preference (yes/no/indifferent). + :param (bool | None) foreign_only: foreign only preference (yes/no/indifferent). :param bool only_one: download only one subtitle, not one per language. :param compute_score: function that takes `subtitle` and `video` as positional arguments, `hearing_impaired` as keyword argument and returns the score. @@ -825,6 +839,7 @@ def download_best_subtitles( languages, min_score=min_score, hearing_impaired=hearing_impaired, + foreign_only=foreign_only, only_one=only_one, compute_score=compute_score, ) @@ -861,7 +876,7 @@ def save_subtitles( :param str directory: path to directory where to save the subtitles, default is next to the video. :param str encoding: encoding in which to save the subtitles, default is to keep original encoding. :param (str | None) extension: the subtitle extension, default is to match to the subtitle format. - :param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False. + :param bool language_type_suffix: add a suffix 'hi' or 'fo' if needed. Default to False. :param str language_format: format of the language suffix. Default to 'alpha2'. :return: the saved subtitles :rtype: list of :class:`~subliminal.subtitle.Subtitle` diff --git a/subliminal/providers/opensubtitlescom.py b/subliminal/providers/opensubtitlescom.py index 493ef411..d0e112ea 100644 --- a/subliminal/providers/opensubtitlescom.py +++ b/subliminal/providers/opensubtitlescom.py @@ -179,6 +179,7 @@ def __init__( subtitle_id: str, *, hearing_impaired: bool = False, + foreign_only: bool = False, movie_kind: str | None = None, release: str | None = None, movie_title: str | None = None, @@ -199,7 +200,14 @@ def __init__( file_id: int = 0, file_name: str = '', ) -> None: - super().__init__(language, subtitle_id, hearing_impaired=hearing_impaired, page_link=None, encoding='utf-8') + super().__init__( + language, + subtitle_id, + hearing_impaired=hearing_impaired, + foreign_only=foreign_only, + page_link=None, + encoding='utf-8', + ) self.movie_kind = movie_kind self.release = release self.movie_title = movie_title @@ -235,6 +243,7 @@ def from_response( attributes = response.get('attributes', {}) language = Language.fromopensubtitlescom(str(attributes.get('language'))) hearing_impaired = bool(int(attributes.get('hearing_impaired'))) + foreign_only = bool(int(attributes.get('foreign_parts_only'))) release = str(attributes.get('release')) moviehash_match = bool(attributes.get('moviehash_match', False)) download_count = int(attributes.get('download_count')) @@ -266,6 +275,7 @@ def from_response( language, subtitle_id, hearing_impaired=hearing_impaired, + foreign_only=foreign_only, movie_kind=movie_kind, release=release, movie_title=movie_title, diff --git a/subliminal/score.py b/subliminal/score.py index b9c758c9..c691e4c7 100644 --- a/subliminal/score.py +++ b/subliminal/score.py @@ -49,7 +49,7 @@ class ComputeScore(Protocol): """Compute the score of a subtitle matching a video.""" - def __call__(self, subtitle: Subtitle, video: Video, *, hearing_impaired: bool | None) -> int: ... # noqa: D102 + def __call__(self, subtitle: Subtitle, video: Video) -> int: ... # noqa: D102 # Check if sympy is installed (for tests) @@ -141,8 +141,8 @@ def match_hearing_impaired(subtitle: Subtitle, *, hearing_impaired: bool | None ) -def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool | None = None) -> int: - """Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference. +def compute_score(subtitle: Subtitle, video: Video, **kwargs: Any) -> int: + """Compute the score of the `subtitle` against the `video`. :func:`compute_score` uses the :meth:`Subtitle.get_matches ` method and applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing. @@ -151,12 +151,11 @@ def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool | :type subtitle: :class:`~subliminal.subtitle.Subtitle` :param video: the video to compute the score against. :type video: :class:`~subliminal.video.Video` - :param (bool | None) hearing_impaired: hearing impaired preference (None if no preference). :return: score of the subtitle. :rtype: int """ - logger.info('Computing score of %r for video %r with %r', subtitle, video, {'hearing_impaired': hearing_impaired}) + logger.info('Computing score of %r for video %r', subtitle, video) # get the scores dict scores = get_scores(video) @@ -193,17 +192,12 @@ def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool | logger.debug('Adding imdb_id match equivalents') matches |= {'title', 'year', 'country'} - # handle hearing impaired - if match_hearing_impaired(subtitle, hearing_impaired=hearing_impaired): - logger.debug('Matched hearing_impaired') - matches.add('hearing_impaired') - # compute the score score = int(sum(scores.get(match, 0) for match in matches)) logger.info('Computed score %r with final matches %r', score, matches) # ensure score is within valid bounds - max_score = scores['hash'] + scores['hearing_impaired'] + max_score = scores['hash'] if not (0 <= score <= max_score): # pragma: no cover logger.info('Clip score between 0 and %d: %d', max_score, score) score = int(clip(score, 0, max_score)) diff --git a/subliminal/subtitle.py b/subliminal/subtitle.py index 83c600c4..d29bee3c 100644 --- a/subliminal/subtitle.py +++ b/subliminal/subtitle.py @@ -49,21 +49,22 @@ class LanguageType(Enum): """Subtitle language types.""" UNKNOWN = 'unknown' - FORCED = 'forced' + FOREIGN_ONLY = 'foreign_only' NORMAL = 'normal' HEARING_IMPAIRED = 'hearing_impaired' @classmethod - def from_flags(cls, *, hearing_impaired: bool | None = None, forced: bool | None = None) -> LanguageType: + def from_flags(cls, *, hearing_impaired: bool | None = None, foreign_only: bool | None = None) -> LanguageType: """Convert to LanguageType from flags.""" language_type = cls.UNKNOWN + # hearing_impaired takes precedence over foreign_only if both are True if hearing_impaired: language_type = cls.HEARING_IMPAIRED - elif forced: - language_type = cls.FORCED - # if hearing_impaired or forced is specified to be False + elif foreign_only: + language_type = cls.FOREIGN_ONLY + # if hearing_impaired or foreign_only is specified to be False # then for sure the subtitle is normal. - elif hearing_impaired is False or forced is False: + elif hearing_impaired is False or foreign_only is False: language_type = cls.NORMAL return language_type @@ -76,9 +77,9 @@ def is_hearing_impaired(self) -> bool | None: return None return False - def is_forced(self) -> bool | None: - """Flag for forced.""" - if self == LanguageType.FORCED: + def is_foreign_only(self) -> bool | None: + """Flag for foreign only.""" + if self == LanguageType.FOREIGN_ONLY: return True if self == LanguageType.UNKNOWN: return None @@ -91,6 +92,7 @@ class Subtitle: :param language: language of the subtitle. :type language: :class:`~babelfish.language.Language` :param (bool | None) hearing_impaired: whether or not the subtitle is hearing impaired (None if unknown). + :param (bool | None) foreign_only: whether or not the subtitle is foreign only / forced (None if unknown). :param page_link: URL of the web page from which the subtitle can be downloaded. :type page_link: str :param encoding: Text encoding of the subtitle. @@ -146,7 +148,7 @@ def __init__( subtitle_id: str = '', *, hearing_impaired: bool | None = None, - forced: bool | None = None, + foreign_only: bool | None = None, page_link: str | None = None, encoding: str | None = None, subtitle_format: str | None = None, @@ -167,7 +169,7 @@ def __init__( self.fps = fps self.embedded = embedded - self.language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, forced=forced) + self.language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, foreign_only=foreign_only) self.encoding = None # validate the encoding if encoding: @@ -192,9 +194,9 @@ def hearing_impaired(self) -> bool | None: return self.language_type.is_hearing_impaired() @property - def forced(self) -> bool | None: - """Whether the subtitle is a forced subtitle.""" - return self.language_type.is_forced() + def foreign_only(self) -> bool | None: + """Whether the subtitle is a foreign only / forced subtitle.""" + return self.language_type.is_foreign_only() @property def content(self) -> bytes | None: @@ -379,7 +381,7 @@ def get_path( :type video: :class:`~subliminal.video.Video` :param bool single: save a single subtitle, default is to save one subtitle per language. :param (str | None) extension: the subtitle extension, default is to match to the subtitle format. - :param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False. + :param bool language_type_suffix: add a suffix 'hi' or 'fo' if needed. Default to False. :param str language_format: format of the language suffix. Default to 'alpha2'. :return: path of the subtitle. :rtype: str @@ -426,7 +428,7 @@ def __init__( language: Language, *, hearing_impaired: bool | None = None, - forced: bool | None = None, + foreign_only: bool | None = None, encoding: str | None = None, subtitle_format: str | None = None, ) -> None: @@ -436,7 +438,7 @@ def __init__( language, subtitle_id, hearing_impaired=hearing_impaired, - forced=forced, + foreign_only=foreign_only, encoding=encoding, subtitle_format=subtitle_format, embedded=True, @@ -448,8 +450,8 @@ def info(self) -> str: extra = '' if self.language_type == LanguageType.HEARING_IMPAIRED: extra = ' [hi]' - elif self.language_type == LanguageType.FORCED: - extra = ' [forced]' + elif self.language_type == LanguageType.FOREIGN_ONLY: + extra = ' [fo]' return f'{self.id}{extra}' @@ -491,9 +493,12 @@ def get_subtitle_suffix( :param language: language of the subtitle to put in the path. :type language: :class:`~babelfish.language.Language` - :param str language_format: format of the language suffix. Default to 'alpha2'. - :param LanguageType language_type: the language type of the subtitle (hearing impaired or forced). - :param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False. + :param str language_format: format of the language suffix. + Default to 'alpha2'. + :param LanguageType language_type: the language type of the subtitle + (hearing impaired or foreign only). + :param bool language_type_suffix: add a suffix 'hi' or 'fo' if needed. + Default to False. :return: suffix to the subtitle name. :rtype: str @@ -525,8 +530,8 @@ def get_subtitle_suffix( if language_type_suffix: if language_type == LanguageType.HEARING_IMPAIRED: language_type_part = '.hi' - elif language_type == LanguageType.FORCED: - language_type_part = '.forced' + elif language_type == LanguageType.FOREIGN_ONLY: + language_type_part = '.fo' return language_type_part + language_part diff --git a/tests/test_score.py b/tests/test_score.py index 5b9f4b5a..25276541 100644 --- a/tests/test_score.py +++ b/tests/test_score.py @@ -141,6 +141,4 @@ def test_compute_score_hash_hearing_impaired(movies): filename='', encoding='utf-8', ) - assert compute_score(subtitle, video, hearing_impaired=True) == ( - movie_scores['hash'] + movie_scores['hearing_impaired'] - ) + assert compute_score(subtitle, video, hearing_impaired=True) == movie_scores['hash'] diff --git a/tests/test_subtitle.py b/tests/test_subtitle.py index 1c42028e..123c63fa 100644 --- a/tests/test_subtitle.py +++ b/tests/test_subtitle.py @@ -18,26 +18,26 @@ @pytest.mark.parametrize('hearing_impaired', [None, True, False]) -@pytest.mark.parametrize('forced', [None, True, False]) -def test_languague_type(hearing_impaired: bool | None, forced: bool | None) -> None: - language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, forced=forced) +@pytest.mark.parametrize('foreign_only', [None, True, False]) +def test_languague_type(hearing_impaired: bool | None, foreign_only: bool | None) -> None: + language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, foreign_only=foreign_only) if hearing_impaired is True: assert language_type == LanguageType.HEARING_IMPAIRED assert language_type.is_hearing_impaired() is True - assert language_type.is_forced() is False - elif forced is True: - assert language_type == LanguageType.FORCED + assert language_type.is_foreign_only() is False + elif foreign_only is True: + assert language_type == LanguageType.FOREIGN_ONLY assert language_type.is_hearing_impaired() is False - assert language_type.is_forced() is True - elif hearing_impaired is False or forced is False: + assert language_type.is_foreign_only() is True + elif hearing_impaired is False or foreign_only is False: assert language_type == LanguageType.NORMAL assert language_type.is_hearing_impaired() is False - assert language_type.is_forced() is False + assert language_type.is_foreign_only() is False else: assert language_type == LanguageType.UNKNOWN assert language_type.is_hearing_impaired() is None - assert language_type.is_forced() is None + assert language_type.is_foreign_only() is None def test_subtitle_text() -> None: @@ -175,14 +175,14 @@ def test_get_subtitle_path_hearing_impaired(movies): assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.hi.de-CH-Latn.srt' -def test_get_subtitle_path_forced(movies): +def test_get_subtitle_path_foreign_only(movies): video = movies['man_of_steel'] suffix = get_subtitle_suffix( Language('srp', None, 'Cyrl'), - language_type=LanguageType.FORCED, + language_type=LanguageType.FOREIGN_ONLY, language_type_suffix=True, ) - assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.forced.sr-Cyrl.srt' + assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.fo.sr-Cyrl.srt' def test_get_subtitle_path_alpha3(movies): @@ -243,12 +243,12 @@ def test_subtitle_invalid_encoding(): def test_subtitle_guess_encoding_utf8(): subtitle = Subtitle( language=Language('zho'), - forced=False, + foreign_only=False, page_link=None, encoding=None, ) subtitle.content = b'Something here' - assert subtitle.forced is False + assert subtitle.foreign_only is False assert subtitle.guess_encoding() == 'utf-8' assert subtitle.text == 'Something here' @@ -285,7 +285,7 @@ def test_subtitle_info(monkeypatch) -> None: subtitle = Subtitle( Language('eng'), 'xv34e', - forced=True, + foreign_only=True, ) text = '1\n00:00:20,000 --> 00:00:24,400\nIn response to your honored\n\n' monkeypatch.setattr(Subtitle, 'text', text) @@ -306,10 +306,10 @@ def test_embedded_subtitle_info_hearing_impaired(monkeypatch) -> None: assert isinstance(subtitle.info, str) -def test_embedded_subtitle_info_forced(monkeypatch) -> None: +def test_embedded_subtitle_info_foreign_only(monkeypatch) -> None: subtitle = EmbeddedSubtitle( Language('fra'), - forced=True, + foreign_only=True, ) text = '1\n00:00:20,000 --> 00:00:24,400\nEn réponse à votre honorée du tant\n\n' monkeypatch.setattr(Subtitle, 'text', text)