Skip to content

Commit

Permalink
add filters for valid emoji
Browse files Browse the repository at this point in the history
  • Loading branch information
wenh06 committed Nov 23, 2023
1 parent 5ad9c74 commit 2ba6193
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 6 deletions.
22 changes: 18 additions & 4 deletions sphinx_emoji_favicon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,24 @@


# constants
emoji_unicodes = set(EMOJI_DATA.keys())
_defalut_twemoji_latest_version = "14.0.2"
_str2emoji = {}
_str2emoji_lang = {lang: {} for lang in LANGUAGES}
for k, v in EMOJI_DATA.items():
if "status" in v and v["status"] > 2:
# https://carpedm20.github.io/emoji/docs/api.html#emoji-status
# 1: component, 2: fully-qualified, 3: minimally-qualified, 4: unqualified
continue
if "E" in v and str(v["E"]) > _defalut_twemoji_latest_version:
# skip emoji with newer version than twemoji latest version
continue
# limit to unicode emoji with length <= 2
# longer emoji strings are usually composed of multiple unicode emoji
if len(k) > 2:
continue
for key, s in v.items():
if key in ["status", "E", "variant"]:
# "E" for Emoji version: https://carpedm20.github.io/emoji/docs/api.html#emoji-version
continue
if key == "alias":
for alias in s:
Expand All @@ -36,6 +48,7 @@
_str2emoji[s] = k
_str2emoji_lang[key][s] = k
emoji_strs = set(_str2emoji.keys())
emoji_unicodes = set(_str2emoji.values())


def _url_is_reachable(url: str, timeout: float = 0.8) -> bool:
Expand Down Expand Up @@ -70,7 +83,6 @@ def _get_twemoji_latest_version() -> str:
The latest twemoji version.
"""
defalut_latest_version = "14.0.2"
url = "https://unpkg.com/twemoji@latest/dist/twemoji.min.js"
try:
r = requests.get(url, timeout=3)
Expand All @@ -80,9 +92,9 @@ def _get_twemoji_latest_version() -> str:
# e.g. https://unpkg.com/[email protected]/dist/twemoji.min.js
return re.search("twemoji@([\\w\\.\\-]+)", r.url).group(1)
else:
return defalut_latest_version
return _defalut_twemoji_latest_version
except Exception:
return defalut_latest_version
return _defalut_twemoji_latest_version


def _get_twemoji_config(twemoji_assets_type: str = "72x72", twemoji_cdn: Optional[str] = None, **kwargs: Any) -> Dict[str, Any]:
Expand Down Expand Up @@ -155,6 +167,8 @@ def _to_code_point(unicode_surrogates: str, sep: str = "-") -> str:
https://github.com/streamlit/streamlit/blob/develop/frontend/lib/src/vendor/twemoji.ts#L7-L24
https://unpkg.com/[email protected]/dist/twemoji.min.js
Parameters
----------
unicode_surrogates : str
Expand Down
25 changes: 23 additions & 2 deletions test/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,38 @@
import random
import re

from emoji import EMOJI_DATA, LANGUAGES
from tqdm.auto import tqdm

from sphinx_emoji_favicon import _str2emoji, create_emoji_favicon_meta
from sphinx_emoji_favicon import _defalut_twemoji_latest_version, _str2emoji, _url_is_reachable, create_emoji_favicon_meta


def test_create_emoji_favicon_meta():
check_link_probability = 0.01
for emoji_unicode, emoji_data in tqdm(EMOJI_DATA.items(), total=len(EMOJI_DATA)):
if "status" in emoji_data and emoji_data["status"] > 2:
continue
if "E" in emoji_data and str(emoji_data["E"]) > _defalut_twemoji_latest_version:
continue
if len(emoji_unicode) > 2:
continue
emoji_favicon_meta = create_emoji_favicon_meta(emoji_unicode)
for lang in LANGUAGES:
if lang not in emoji_data:
continue
emoji_str = emoji_data[lang]
assert create_emoji_favicon_meta(emoji_str, lang) == emoji_favicon_meta
new_emoji_favicon_meta = create_emoji_favicon_meta(emoji_str, lang)
assert new_emoji_favicon_meta == emoji_favicon_meta
if random.random() <= check_link_probability:
link = re.search(r'href="([^"]+)"', emoji_favicon_meta).group(1)
# assert _url_is_reachable(link)
# some of the emoji from the latest versions are not yet available in Twemoji
# so we DO NOT assert that the link is reachable
if not _url_is_reachable(link):
print(emoji_unicode)
print(emoji_str)
print("status:", emoji_data["status"])
print("version:", emoji_data["E"])
for emoji_alias in emoji_data.get("alias", []):
if emoji_alias in _str2emoji:
continue
Expand Down

0 comments on commit 2ba6193

Please sign in to comment.