diff --git a/sphinx_emoji_favicon/__init__.py b/sphinx_emoji_favicon/__init__.py index 11105c1..c1b8d10 100644 --- a/sphinx_emoji_favicon/__init__.py +++ b/sphinx_emoji_favicon/__init__.py @@ -18,12 +18,24 @@ # constants -emoji_unicodes = set(EMOJI_DATA.keys()) +_defalut_twemoji_latest_version = "14.0.2" _str2emoji = {} _str2emoji_lang = {lang: {} for lang in LANGUAGES} for k, v in EMOJI_DATA.items(): + if "status" in v and v["status"] > 2: + # https://carpedm20.github.io/emoji/docs/api.html#emoji-status + # 1: component, 2: fully-qualified, 3: minimally-qualified, 4: unqualified + continue + if "E" in v and str(v["E"]) > _defalut_twemoji_latest_version: + # skip emoji with newer version than twemoji latest version + continue + # limit to unicode emoji with length <= 2 + # longer emoji strings are usually composed of multiple unicode emoji + if len(k) > 2: + continue for key, s in v.items(): if key in ["status", "E", "variant"]: + # "E" for Emoji version: https://carpedm20.github.io/emoji/docs/api.html#emoji-version continue if key == "alias": for alias in s: @@ -36,6 +48,7 @@ _str2emoji[s] = k _str2emoji_lang[key][s] = k emoji_strs = set(_str2emoji.keys()) +emoji_unicodes = set(_str2emoji.values()) def _url_is_reachable(url: str, timeout: float = 0.8) -> bool: @@ -70,7 +83,6 @@ def _get_twemoji_latest_version() -> str: The latest twemoji version. """ - defalut_latest_version = "14.0.2" url = "https://unpkg.com/twemoji@latest/dist/twemoji.min.js" try: r = requests.get(url, timeout=3) @@ -80,9 +92,9 @@ def _get_twemoji_latest_version() -> str: # e.g. https://unpkg.com/twemoji@14.0.2/dist/twemoji.min.js return re.search("twemoji@([\\w\\.\\-]+)", r.url).group(1) else: - return defalut_latest_version + return _defalut_twemoji_latest_version except Exception: - return defalut_latest_version + return _defalut_twemoji_latest_version def _get_twemoji_config(twemoji_assets_type: str = "72x72", twemoji_cdn: Optional[str] = None, **kwargs: Any) -> Dict[str, Any]: @@ -155,6 +167,8 @@ def _to_code_point(unicode_surrogates: str, sep: str = "-") -> str: https://github.com/streamlit/streamlit/blob/develop/frontend/lib/src/vendor/twemoji.ts#L7-L24 + https://unpkg.com/twemoji@14.0.2/dist/twemoji.min.js + Parameters ---------- unicode_surrogates : str diff --git a/test/test_parser.py b/test/test_parser.py index 76534fe..4298c54 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -1,17 +1,38 @@ +import random +import re + from emoji import EMOJI_DATA, LANGUAGES from tqdm.auto import tqdm -from sphinx_emoji_favicon import _str2emoji, create_emoji_favicon_meta +from sphinx_emoji_favicon import _defalut_twemoji_latest_version, _str2emoji, _url_is_reachable, create_emoji_favicon_meta def test_create_emoji_favicon_meta(): + check_link_probability = 0.01 for emoji_unicode, emoji_data in tqdm(EMOJI_DATA.items(), total=len(EMOJI_DATA)): + if "status" in emoji_data and emoji_data["status"] > 2: + continue + if "E" in emoji_data and str(emoji_data["E"]) > _defalut_twemoji_latest_version: + continue + if len(emoji_unicode) > 2: + continue emoji_favicon_meta = create_emoji_favicon_meta(emoji_unicode) for lang in LANGUAGES: if lang not in emoji_data: continue emoji_str = emoji_data[lang] - assert create_emoji_favicon_meta(emoji_str, lang) == emoji_favicon_meta + new_emoji_favicon_meta = create_emoji_favicon_meta(emoji_str, lang) + assert new_emoji_favicon_meta == emoji_favicon_meta + if random.random() <= check_link_probability: + link = re.search(r'href="([^"]+)"', emoji_favicon_meta).group(1) + # assert _url_is_reachable(link) + # some of the emoji from the latest versions are not yet available in Twemoji + # so we DO NOT assert that the link is reachable + if not _url_is_reachable(link): + print(emoji_unicode) + print(emoji_str) + print("status:", emoji_data["status"]) + print("version:", emoji_data["E"]) for emoji_alias in emoji_data.get("alias", []): if emoji_alias in _str2emoji: continue