Skip to content

Commit

Permalink
fix(pull): added size limit parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
darkdragn committed Oct 2, 2024
1 parent 1237e64 commit 216559f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 9 deletions.
12 changes: 10 additions & 2 deletions party/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@
"exclusive with post_id, post_title and file_format"
)

size_limit_option = typer.Option(
help="Allows for a size limit, in Megabytes, as a cut off for downloaded "
"files. Example: if 50, no files larger than 50Mb will be downloaded."
)
file_format_option = typer.Option(
help="Used to set the output file format. "
"Mutually exclusive with post_id, post_title and ordered short. "
Expand All @@ -117,6 +121,7 @@ def pull_user(
post_title: Annotated[bool, post_title_option] = False,
ordered_short: Annotated[bool, ordered_short_option] = False,
file_format: Annotated[str, file_format_option] = "{ref.filename}",
size_limit: Annotated[int, size_limit_option] = -1,
sluglify: bool = False,
full_check: bool = False,
):
Expand Down Expand Up @@ -160,6 +165,7 @@ def pull_user(
ordered_short=ordered_short,
file_format=file_format,
sluglify=sluglify,
size_limit=size_limit,
)

update_csluglify(sluglify)
Expand Down Expand Up @@ -209,7 +215,8 @@ def pull_user(
typer.secho(f"Downloading from user: {user.name}", fg=typer.colors.MAGENTA)
with tqdm(total=len(files)) as pbar:
output = asyncio.run(
download_async(pbar, site, directory, files, workers, full_check)
download_async(pbar, site, directory, files, workers, full_check,
size_limit)
)
write_etags(directory)
count = Counter(output)
Expand All @@ -223,6 +230,7 @@ async def download_async(
files,
workers: int = 10,
full_check: bool = False,
size_limit: int = -1,
):
"""Basic AsyncIO implementation of downloads for files"""
timeout = aiohttp.ClientTimeout(60 * 60, sock_connect=30)
Expand Down Expand Up @@ -250,7 +258,7 @@ async def download(file, semaphore):
filename = f"{directory}/{file.filename}"
async with semaphore:
status = await file.download(
session, filename, 0, full_check
session, filename, 0, full_check, size_limit
)
if status == StatusEnum.ERROR_429 and workers > 1:
workers = workers - 1
Expand Down
1 change: 1 addition & 0 deletions party/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class StatusEnum(Enum):
EXISTS = 5
ERROR_OSERROR = 6
DUPLICATE = 7
TOO_LARGE = 8


def generate_token(size=16):
Expand Down
33 changes: 26 additions & 7 deletions party/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
import aiohttp
import desert

from aiohttp import ClientPayloadError, ServerTimeoutError, ClientConnectorError
from aiohttp import (
ClientPayloadError,
ServerTimeoutError,
ClientConnectorError,
)
from dateutil.parser import parse
from loguru import logger
from tqdm import tqdm
Expand Down Expand Up @@ -122,6 +126,7 @@ async def download(
filename: str = ".",
retries: int = 0,
full_check: bool = False,
cut_off: int = -1,
):
"""Async download handler"""
status = StatusEnum.SUCCESS
Expand All @@ -138,6 +143,8 @@ async def download(
}
try:
async with session.head(url, allow_redirects=True) as head:
size_in_mb = (int(head.headers["content-length"])/1024/1024) \
if 'content-length' in head.headers else 1
if head.status == 429:
return StatusEnum.ERROR_429
try:
Expand All @@ -149,6 +156,12 @@ async def download(
return StatusEnum.ERROR_OTHER
if etag_exists(tag) and not os.path.exists(filename):
return StatusEnum.DUPLICATE
if (
cut_off > 0
and "content-length" in head.headers
and cut_off < size_in_mb
):
return StatusEnum.TOO_LARGE
add_etag(tag)

async with session.get(url, headers=headers) as resp:
Expand Down Expand Up @@ -178,7 +191,11 @@ async def download(
)
fbar.refresh()
fbar.close()
except (ClientPayloadError, ServerTimeoutError, ClientConnectorError) as err:
except (
ClientPayloadError,
ServerTimeoutError,
ClientConnectorError,
) as err:
logger.debug(
{
"error": err,
Expand Down Expand Up @@ -226,17 +243,19 @@ async def download(
{"error": err, "filename": filename, "url": self.path}
)
status = StatusEnum.ERROR_OTHER
except (ConnectTimeoutError, ServerTimeoutError, ClientConnectorError) as err:
except (
ConnectTimeoutError,
ServerTimeoutError,
ClientConnectorError,
) as err:
logger.debug(
{"error": err, "filename": filename, "url": self.path}
)
if retries < 2:
status = await self.download(
session, filename, retries + 1
)
status = await self.download(session, filename, retries + 1)
else:
status = StatusEnum.ERROR_TIMEOUT
if 'tag' in locals():
if "tag" in locals():
remove_etag(tag)
return status

Expand Down

0 comments on commit 216559f

Please sign in to comment.