From 19dde91f2df3b517f83e7406b523a74618371d13 Mon Sep 17 00:00:00 2001 From: Shrimadhav U K Date: Sat, 5 Feb 2022 23:31:09 +0530 Subject: [PATCH] improve extract_urls function, and rename the function (#462) --- userge/plugins/misc/download.py | 4 ++-- userge/utils/__init__.py | 2 +- userge/utils/tools.py | 34 ++++++++++++++++++++++++++++----- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/userge/plugins/misc/download.py b/userge/plugins/misc/download.py index b2136f841..df58dd193 100644 --- a/userge/plugins/misc/download.py +++ b/userge/plugins/misc/download.py @@ -20,7 +20,7 @@ from pySmartDL import SmartDL from userge import userge, Message, Config -from userge.utils import progress, humanbytes, extract_urls +from userge.utils import progress, humanbytes, extract_entities from userge.utils.exceptions import ProcessCanceled LOGGER = userge.getLogger(__name__) @@ -151,7 +151,7 @@ async def tg_download( """ download from tg file """ if not to_download.media: dl_loc, mite = [], 0 - ets = extract_urls(to_download) + ets = extract_entities(to_download, ["url", "text_link"]) if len(ets) == 0: raise Exception("nothing found to download") for uarl in ets: diff --git a/userge/utils/__init__.py b/userge/utils/__init__.py index fe4051a21..d389fa6e8 100644 --- a/userge/utils/__init__.py +++ b/userge/utils/__init__.py @@ -22,4 +22,4 @@ take_screen_shot, parse_buttons, is_command, - extract_urls) + extract_entities) diff --git a/userge/utils/tools.py b/userge/utils/tools.py index 61b0d0b34..bf932e290 100644 --- a/userge/utils/tools.py +++ b/userge/utils/tools.py @@ -222,18 +222,42 @@ def is_command(cmd: str) -> bool: return is_cmd -def extract_urls(message: Message) -> List[str]: +def extract_entities(message: Message, typeofentity: List[str]) -> List[str]: + """ gets a message and returns a list of entity_type in the message + """ tero = [] entities = message.entities or message.caption_entities or [] text = message.text or message.caption or "" for entity in entities: url = None - if entity.type == "text_link": - url = entity.url - elif entity.type == "url": + cet = entity.type + if entity.type in [ + "url", + "mention", + "hashtag", + "cashtag", + "bot_command", + "url", + "email", + "phone_number", + "bold", + "italic", + "underline", + "strikethrough", + "spoiler", + "code", + "pre", + ]: offset = entity.offset length = entity.length url = text[offset:offset + length] - if url: + + elif entity.type == "text_link": + url = entity.url + + elif entity.type == "text_mention": + url = entity.user + + if url and cet in typeofentity: tero.append(url) return tero