Skip to content

Commit

Permalink
improve extract_urls function, and rename the function (UsergeTeam#462)
Browse files Browse the repository at this point in the history
  • Loading branch information
SpEcHiDe authored and pull[bot] committed Sep 26, 2023
1 parent cd01f67 commit 6079754
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 8 deletions.
4 changes: 2 additions & 2 deletions userge/plugins/misc/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from pySmartDL import SmartDL

from userge import userge, Message, Config
from userge.utils import progress, humanbytes, extract_urls
from userge.utils import progress, humanbytes, extract_entities
from userge.utils.exceptions import ProcessCanceled

LOGGER = userge.getLogger(__name__)
Expand Down Expand Up @@ -151,7 +151,7 @@ async def tg_download(
""" download from tg file """
if not to_download.media:
dl_loc, mite = [], 0
ets = extract_urls(to_download)
ets = extract_entities(to_download, ["url", "text_link"])
if len(ets) == 0:
raise Exception("nothing found to download")
for uarl in ets:
Expand Down
2 changes: 1 addition & 1 deletion userge/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
take_screen_shot,
parse_buttons,
is_command,
extract_urls)
extract_entities)
34 changes: 29 additions & 5 deletions userge/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,18 +222,42 @@ def is_command(cmd: str) -> bool:
return is_cmd


def extract_urls(message: Message) -> List[str]:
def extract_entities(message: Message, typeofentity: List[str]) -> List[str]:
""" gets a message and returns a list of entity_type in the message
"""
tero = []
entities = message.entities or message.caption_entities or []
text = message.text or message.caption or ""
for entity in entities:
url = None
if entity.type == "text_link":
url = entity.url
elif entity.type == "url":
cet = entity.type
if entity.type in [
"url",
"mention",
"hashtag",
"cashtag",
"bot_command",
"url",
"email",
"phone_number",
"bold",
"italic",
"underline",
"strikethrough",
"spoiler",
"code",
"pre",
]:
offset = entity.offset
length = entity.length
url = text[offset:offset + length]
if url:

elif entity.type == "text_link":
url = entity.url

elif entity.type == "text_mention":
url = entity.user

if url and cet in typeofentity:
tero.append(url)
return tero

0 comments on commit 6079754

Please sign in to comment.