From 8fb7f48ebbf44043f48d8029fdebca27ab8dfdba Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 20 Jan 2024 18:04:54 +0100 Subject: [PATCH 01/13] Code improvements/refactorings before trying a switch. --- download/m3u8.py | 226 ++++++++++++++++++++++++++++++++-------- download/media.py | 6 +- fansly_downloader_ng.py | 7 +- requirements.txt | 2 + utils/web.py | 89 +++++++++++++++- 5 files changed, 277 insertions(+), 53 deletions(-) diff --git a/download/m3u8.py b/download/m3u8.py index 28913fb..7d4d706 100644 --- a/download/m3u8.py +++ b/download/m3u8.py @@ -1,105 +1,146 @@ -"""Handles M3U8 Media""" +"""M3U8 Media Download Handling""" import av import concurrent.futures import io -import m3u8 from av.audio.stream import AudioStream from av.video.stream import VideoStream +from memory_profiler import profile +from pyffmpeg import FFmpeg from m3u8 import M3U8 from pathlib import Path from rich.table import Column from rich.progress import BarColumn, TextColumn, Progress -from typing import Optional +from typing import Optional, Any from config.fanslyconfig import FanslyConfig from errors import M3U8Error +from utils.web import get_file_name_from_url, get_qs_value, split_url from textio import print_error +def get_m3u8_cookies(m3u8_url: str) -> dict[str, Any]: + """Parses an M3U8 URL and returns CloudFront cookies. + """ + # Parse URL query string for required cookie values + policy = get_qs_value(m3u8_url, 'Policy') + key_pair_id = get_qs_value(m3u8_url, 'Key-Pair-Id') + signature = get_qs_value(m3u8_url, 'Signature') + + cookies = { + 'CloudFront-Key-Pair-Id': key_pair_id, + 'CloudFront-Policy': policy, + 'CloudFront-Signature': signature, + } + + return cookies + + +def get_m3u8_progress(disable_loading_bar: bool) -> Progress: + """Returns a Rich progress bar customized for M3U8 Downloads. + """ + text_column = TextColumn('', table_column=Column(ratio=1)) + bar_column = BarColumn(bar_width=60, table_column=Column(ratio=5)) + + return Progress( + text_column, + bar_column, + expand=True, + transient=True, + disable=disable_loading_bar, + ) + + +@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) def download_m3u8(config: FanslyConfig, m3u8_url: str, save_path: Path) -> bool: """Download M3U8 content as MP4. - :param FanslyConfig config: The downloader configuration. - :param str m3u8_url: The URL string of the M3U8 to download. - :param Path save_path: The suggested file to save the video to. + :param config: The downloader configuration. + :type config: FanslyConfig + + :param m3u8_url: The URL string of the M3U8 to download. + :type m3u8_url: str + + :param save_path: The suggested file to save the video to. This will be changed to MP4 (.mp4). + :type save_path: Path :return: True if successful or False otherwise. :rtype: bool """ - # parse m3u8_url for required strings - parsed_url = {k: v for k, v in [s.split('=') for s in m3u8_url.split('?')[-1].split('&')]} - - policy = parsed_url.get('Policy') - key_pair_id = parsed_url.get('Key-Pair-Id') - signature = parsed_url.get('Signature') - # re-construct original .m3u8 base link - m3u8_url = m3u8_url.split('.m3u8')[0] + '.m3u8' - # used for constructing .ts chunk links - split_m3u8_url = m3u8_url.rsplit('/', 1)[0] - # remove file extension (.m3u8) from save_path + CHUNK_SIZE = 65_536 + + # Remove file extension (.m3u8) from save_path save_path = save_path.parent / save_path.stem - cookies = { - 'CloudFront-Key-Pair-Id': key_pair_id, - 'CloudFront-Policy': policy, - 'CloudFront-Signature': signature, - } + cookies = get_m3u8_cookies(m3u8_url) - # download the m3u8 playlist - playlist_content_req = config.http_session.get(m3u8_url, headers=config.http_headers(), cookies=cookies) + m3u8_base_url, m3u8_file_url = split_url(m3u8_url) - if playlist_content_req.status_code != 200: - print_error(f'Failed downloading m3u8; at playlist_content request. Response code: {playlist_content_req.status_code}\n{playlist_content_req.text}', 12) + # download the m3u8 playlist + playlist_response = config.http_session.get( + m3u8_file_url, + headers=config.http_headers(), + cookies=cookies, + ) + + if playlist_response.status_code != 200: + print_error( + f'Failed downloading m3u8; at playlist_content request. ' + f'Response code: {playlist_response.status_code}\n' + f'{playlist_response.text}', + 12 + ) return False - playlist_content = playlist_content_req.text + playlist_text = playlist_response.text # parse the m3u8 playlist content using the m3u8 library - playlist_obj: M3U8 = m3u8.loads(playlist_content) + playlist: M3U8 = M3U8(playlist_text, base_uri=m3u8_base_url) # get a list of all the .ts files in the playlist - ts_files = [segment.uri for segment in playlist_obj.segments if segment.uri.endswith('.ts')] + ts_files = [segment.absolute_uri for segment in playlist.segments if segment.uri.endswith('.ts')] # define a nested function to download a single .ts file and return the content - def download_ts(ts_file: str): - ts_url = f"{split_m3u8_url}/{ts_file}" - ts_response = config.http_session.get(ts_url, headers=config.http_headers(), cookies=cookies, stream=True) + def download_ts(ts_url: str) -> bytes: + ts_response = config.http_session.get( + ts_url, + headers=config.http_headers(), + cookies=cookies, + stream=True, + ) + buffer = io.BytesIO() - for chunk in ts_response.iter_content(chunk_size=1024): + for chunk in ts_response.iter_content(chunk_size=CHUNK_SIZE): buffer.write(chunk) ts_content = buffer.getvalue() return ts_content - # if m3u8 seems like it might be bigger in total file size; display loading bar - text_column = TextColumn(f"", table_column=Column(ratio=1)) - bar_column = BarColumn(bar_width=60, table_column=Column(ratio=5)) - - disable_loading_bar = False if len(ts_files) > 15 else True - - progress = Progress(text_column, bar_column, expand=True, transient=True, disable = disable_loading_bar) + # Display loading bar if there are many segments + progress = get_m3u8_progress( + disable_loading_bar=len(ts_files) < 15 + ) with progress: with concurrent.futures.ThreadPoolExecutor() as executor: - ts_contents = [ + segment_bytes_list = [ file for file in progress.track( executor.map(download_ts, ts_files), total=len(ts_files) ) ] - segment = bytearray() + all_ts_bytes = bytearray() - for ts_content in ts_contents: - segment += ts_content + for segment_bytes in segment_bytes_list: + all_ts_bytes += segment_bytes - input_container = av.open(io.BytesIO(segment), format='mpegts') + input_container = av.open(io.BytesIO(all_ts_bytes), format='mpegts') first_video_stream: Optional[VideoStream] = None first_audio_stream: Optional[AudioStream] = None @@ -116,7 +157,7 @@ def download_ts(ts_file: str): has_audio = first_audio_stream is not None if not has_video and not has_audio: - raise M3U8Error(f'Neiter audio nor video in M3U8 file: {m3u8_url}') + raise M3U8Error(f'Neither audio nor video in M3U8 file: {m3u8_file_url}') # define output container and streams output_container = av.open(f"{save_path}.mp4", 'w') # add .mp4 file extension @@ -156,3 +197,96 @@ def download_ts(ts_file: str): output_container.close() return True + + +@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) +def download_video( + config: FanslyConfig, + m3u8_url: str, + save_path: Path, + ) -> None: + + CHUNK_SIZE = 65_536 + + # TODO: Full path? + video_path = save_path.parent + full_path = save_path + stream_uri = m3u8_url + + with config.http_session.get(stream_uri, headers=config.http_headers()) as stream_response: + segments_text = stream_response.text + segments_playlist = M3U8(content=segments_text) + + if segments_playlist.is_endlist != True \ + or segments_playlist.playlist_type != 'vod': + raise M3U8Error(f'Invalid video stream info for {stream_uri}') + + #region Nested function to download TS segments + def download_ts(segment_uri: str, segment_full_path: Path) -> None: + with open(segment_full_path, 'wb') as ts_file: + with config.http_session.get( + segment_uri, + headers=config.http_headers(), + stream=True + ) as segment_response: + for chunk in segment_response.iter_content(CHUNK_SIZE): + if chunk is not None: + ts_file.write(chunk) + #endregion + + segments = segments_playlist.segments + + segment_files: list[Path] = [] + segment_uris: list[str] = [] + + for segment in segments: + segment_uri = segment.absolute_uri + + segment_file_name = get_file_name_from_url(segment_uri) + + segment_full_path = video_path / segment_file_name + + segment_files.append(segment_full_path) + segment_uris.append(segment_uri) + + #download_ts(segment_uri, segment_full_path) + + # Display loading bar if there are many segments + progress = get_m3u8_progress( + disable_loading_bar=len(segment_files) < 5 + ) + + with progress: + with concurrent.futures.ThreadPoolExecutor() as executor: + _ = list( + progress.track( + executor.map(download_ts, segment_uris, segment_files), + total=len(segment_files) + ) + ) + + # Check multi-threaded downloads + for file in segment_files: + if not file.exists(): + raise M3U8Error(f'Stream segment failed to download: {file}') + + ffmpeg_list_file = video_path / '_ffmpeg_concat_.ffc' + + with open(ffmpeg_list_file, 'w', encoding='utf-8') as list_file: + list_file.write('ffconcat version 1.0\n') + list_file.writelines([f"file '{f.name}'\n" for f in segment_files]) + + ffmpeg = FFmpeg(enable_log=config.debug) + + ffmpeg.options( + f'-f concat -i "{ffmpeg_list_file}" -c copy "{full_path}"' + ) + + #region Clean up + + ffmpeg_list_file.unlink() + + for file in segment_files: + file.unlink() + + #endregion diff --git a/download/media.py b/download/media.py index afdf76d..4086e4b 100644 --- a/download/media.py +++ b/download/media.py @@ -159,7 +159,11 @@ def download_media(config: FanslyConfig, state: DownloadState, accessible_media: if media_item.file_extension == 'm3u8': # handle the download of a m3u8 file try: - file_downloaded = download_m3u8(config, m3u8_url=media_item.download_url, save_path=file_save_path) + file_downloaded = download_m3u8( + config, + m3u8_url=media_item.download_url, + save_path=file_save_path + ) if file_downloaded: state.pic_count += 1 if 'image' in media_item.mimetype else 0 diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index a0b1eb0..6c8eccc 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.10' -__date__ = '2024-01-05T14:55:00+01' +__version__ = '0.7.11' +__date__ = '2024-01-20T18:04:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ @@ -26,6 +26,8 @@ import base64 import traceback +from memory_profiler import profile + from config import FanslyConfig, load_config, validate_adjust_config from config.args import parse_args, map_args_to_config from config.modes import DownloadMode @@ -65,6 +67,7 @@ def print_logo() -> None: print(f"{(100 - len(__version__) - 1)//2*' '}v{__version__}\n") +@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) def main(config: FanslyConfig) -> int: """The main logic of the downloader program. diff --git a/requirements.txt b/requirements.txt index bcdb4e4..84b92cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,11 @@ av==11.0.0 ImageHash==4.3.1 loguru==0.7.2 m3u8==3.6.0 +memory-profiler Pillow==10.1.0 plyvel-ci==1.5.0 psutil==5.9.6 +pyffmpeg python-dateutil==2.8.2 requests==2.31.0 rich==13.7.0 diff --git a/utils/web.py b/utils/web.py index c6a695f..a87897c 100644 --- a/utils/web.py +++ b/utils/web.py @@ -6,17 +6,97 @@ import requests import traceback +from collections import namedtuple +from urllib.parse import urlparse, parse_qs from time import sleep +from typing import Any, NamedTuple -from config.fanslyconfig import FanslyConfig -from textio import print_error, print_info_highlight, print_warning +from textio import print_error, print_warning + + +def get_file_name_from_url(url: str) -> str: + """Parses an URL and returns the last part which usually is a + file name or directory/section. + + :param url: The URL to parse. + :type url: str + + :return: The last part of the path ie. everything after the + last slash excluding the query string. + :rtype: str + """ + parsed_url = urlparse(url) + + last_part = parsed_url.path.split('/')[-1] + + return last_part + + +def get_qs_value(url: str, key: str, default: Any=None) -> Any: + """Returns the value of a specific key of an URL query string. + + :param url: The URL to parse for a query string. + :type url: str + + :param key: The key in the query string (&key1=value1&key2=value2 ...) + whose value to return. + :type key: str + + :param default: The default value to return if the + key was not found. + :type default: Any + + :return: The value of `key` in the query string or `default` otherwise. + :rtype: Any + """ + parsed_url = urlparse(url) + qs = parsed_url.query + parsed_qs = parse_qs(qs) + + result = parsed_qs.get(key, default) + + if result is default: + return result + + if len(result) == 0: + return None + + return result[0] + + +def split_url(url: str) -> NamedTuple: + """Splits an URL into absolue base and file name URLs + without query strings et al. + + Eg.: + https://my.server/some/path/interesting.txt?k1=v1&a2=b4 + + becomes + + ( + base_url='https://my.server/some/path', + file_url='https://my.server/some/path/interesting.txt' + ) + """ + parsed_url = urlparse(url) + + # URL without query string et al + file_url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}' + + # Base URL + base_url = file_url.rsplit('/', 1)[0] + + SplitURL = namedtuple('SplitURL', ['base_url', 'file_url']) + + return SplitURL(base_url, file_url) # mostly used to attempt to open fansly downloaders documentation def open_url(url_to_open: str) -> None: """Opens an URL in a browser window. - :param str url_to_open: The URL to open in the browser. + :param url_to_open: The URL to open in the browser. + :type url_to_open: str """ sleep(10) @@ -124,8 +204,9 @@ def guess_user_agent(user_agents: dict, based_on_browser: str, default_ua: str) def get_release_info_from_github(current_program_version: str) -> dict | None: """Fetches and parses the Fansly Downloader NG release info JSON from GitHub. - :param str current_program_version: The current program version to be + :param current_program_version: The current program version to be used in the user agent of web requests. + :type current_program_version: str :return: The release info from GitHub as dictionary or None if there where any complications eg. network error. From dd2dd1378d9e7a77ce44456c913f94e53f7c104a Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 20 Jan 2024 19:41:47 +0100 Subject: [PATCH 02/13] New M3U8 code in place and working. --- download/m3u8.py | 122 ++++++++++++++++++++++++++-------------- fansly_downloader_ng.py | 14 ++++- 2 files changed, 92 insertions(+), 44 deletions(-) diff --git a/download/m3u8.py b/download/m3u8.py index 7d4d706..51c51d6 100644 --- a/download/m3u8.py +++ b/download/m3u8.py @@ -54,7 +54,7 @@ def get_m3u8_progress(disable_loading_bar: bool) -> Progress: @profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) -def download_m3u8(config: FanslyConfig, m3u8_url: str, save_path: Path) -> bool: +def download_m3u8_old(config: FanslyConfig, m3u8_url: str, save_path: Path) -> bool: """Download M3U8 content as MP4. :param config: The downloader configuration. @@ -200,35 +200,70 @@ def download_ts(ts_url: str) -> bytes: @profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) -def download_video( +def download_m3u8( config: FanslyConfig, m3u8_url: str, save_path: Path, - ) -> None: + ) -> bool: + """Download M3U8 content as MP4. + + :param config: The downloader configuration. + :type config: FanslyConfig + :param m3u8_url: The URL string of the M3U8 to download. + :type m3u8_url: str + + :param save_path: The suggested file to save the video to. + This will be changed to MP4 (.mp4). + :type save_path: Path + + :return: True if successful or False otherwise. + :rtype: bool + """ CHUNK_SIZE = 65_536 - # TODO: Full path? + cookies = get_m3u8_cookies(m3u8_url) + + m3u8_base_url, m3u8_file_url = split_url(m3u8_url) + video_path = save_path.parent - full_path = save_path - stream_uri = m3u8_url + full_path = video_path / f'{save_path.stem}.mp4' + + with config.http_session.get( + m3u8_file_url, + headers=config.http_headers(), + cookies=cookies, + ) as stream_response: + + if stream_response.status_code != 200: + print_error( + f'Failed downloading M3U8 at playlist_content request. ' + f'Response code: {stream_response.status_code}\n' + f'{stream_response.text}', + 12 + ) + return False - with config.http_session.get(stream_uri, headers=config.http_headers()) as stream_response: segments_text = stream_response.text - segments_playlist = M3U8(content=segments_text) + + segments_playlist = M3U8( + content=segments_text, + base_uri=m3u8_base_url, + ) if segments_playlist.is_endlist != True \ or segments_playlist.playlist_type != 'vod': - raise M3U8Error(f'Invalid video stream info for {stream_uri}') + raise M3U8Error(f'Invalid video stream info for {m3u8_file_url}') #region Nested function to download TS segments def download_ts(segment_uri: str, segment_full_path: Path) -> None: - with open(segment_full_path, 'wb') as ts_file: - with config.http_session.get( - segment_uri, - headers=config.http_headers(), - stream=True - ) as segment_response: + with config.http_session.get( + segment_uri, + headers=config.http_headers(), + cookies=cookies, + stream=True + ) as segment_response: + with open(segment_full_path, 'wb') as ts_file: for chunk in segment_response.iter_content(CHUNK_SIZE): if chunk is not None: ts_file.write(chunk) @@ -249,44 +284,47 @@ def download_ts(segment_uri: str, segment_full_path: Path) -> None: segment_files.append(segment_full_path) segment_uris.append(segment_uri) - #download_ts(segment_uri, segment_full_path) - # Display loading bar if there are many segments progress = get_m3u8_progress( disable_loading_bar=len(segment_files) < 5 ) - with progress: - with concurrent.futures.ThreadPoolExecutor() as executor: - _ = list( - progress.track( - executor.map(download_ts, segment_uris, segment_files), - total=len(segment_files) + ffmpeg_list_file = video_path / '_ffmpeg_concat_.ffc' + + try: + + with progress: + with concurrent.futures.ThreadPoolExecutor() as executor: + _ = list( + progress.track( + executor.map(download_ts, segment_uris, segment_files), + total=len(segment_files) + ) ) - ) - # Check multi-threaded downloads - for file in segment_files: - if not file.exists(): - raise M3U8Error(f'Stream segment failed to download: {file}') + # Check multi-threaded downloads + for file in segment_files: + if not file.exists(): + raise M3U8Error(f'Stream segment failed to download: {file}') - ffmpeg_list_file = video_path / '_ffmpeg_concat_.ffc' + with open(ffmpeg_list_file, 'w', encoding='utf-8') as list_file: + list_file.write('ffconcat version 1.0\n') + list_file.writelines([f"file '{f.name}'\n" for f in segment_files]) - with open(ffmpeg_list_file, 'w', encoding='utf-8') as list_file: - list_file.write('ffconcat version 1.0\n') - list_file.writelines([f"file '{f.name}'\n" for f in segment_files]) + ffmpeg = FFmpeg(enable_log=config.debug) - ffmpeg = FFmpeg(enable_log=config.debug) + ffmpeg.options( + f'-f concat -i "{ffmpeg_list_file}" -c copy "{full_path}"' + ) - ffmpeg.options( - f'-f concat -i "{ffmpeg_list_file}" -c copy "{full_path}"' - ) + finally: + #region Clean up - #region Clean up + ffmpeg_list_file.unlink(missing_ok=True) - ffmpeg_list_file.unlink() + for file in segment_files: + file.unlink(missing_ok=True) + + #endregion - for file in segment_files: - file.unlink() - - #endregion + return True diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index 6c8eccc..983e888 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.11' -__date__ = '2024-01-20T18:04:00+01' +__version__ = '0.7.13' +__date__ = '2024-01-20T19:41:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ @@ -108,6 +108,16 @@ def main(config: FanslyConfig) -> int: global_download_state = GlobalState() + # M3U8 fixing interim + print() + print_warning( + "THIS IS AN EXPERIMENTAL IMPROVED VIDEO DOWNLOAD VERSION -" + f"\n{' '*19} EXISTING VIDEOS WILL BE DOWNLOADED AGAIN/DE-DUPLICATION WILL NOT WORK" + f"\n{' '*19} FOR DOWNLOADS FROM OLDER VERSIONS!!!" + f"\n{' '*19} CTRL+C TO ABORT" + ) + input_enter_continue(config.interactive) + for creator_name in sorted(config.user_names): with Timer(creator_name): try: From a4d39cd17ad292146a5d3c4ddc9fca26b56d6244 Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 20 Jan 2024 20:01:48 +0100 Subject: [PATCH 03/13] Old code clean-up and package removal. --- download/m3u8.py | 156 +--------------------------------------- fansly_downloader_ng.py | 8 +-- requirements.txt | 11 ++- 3 files changed, 12 insertions(+), 163 deletions(-) diff --git a/download/m3u8.py b/download/m3u8.py index 51c51d6..e85d9af 100644 --- a/download/m3u8.py +++ b/download/m3u8.py @@ -1,19 +1,15 @@ """M3U8 Media Download Handling""" -import av import concurrent.futures -import io -from av.audio.stream import AudioStream -from av.video.stream import VideoStream -from memory_profiler import profile +#from memory_profiler import profile from pyffmpeg import FFmpeg from m3u8 import M3U8 from pathlib import Path from rich.table import Column from rich.progress import BarColumn, TextColumn, Progress -from typing import Optional, Any +from typing import Any from config.fanslyconfig import FanslyConfig from errors import M3U8Error @@ -53,153 +49,7 @@ def get_m3u8_progress(disable_loading_bar: bool) -> Progress: ) -@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) -def download_m3u8_old(config: FanslyConfig, m3u8_url: str, save_path: Path) -> bool: - """Download M3U8 content as MP4. - - :param config: The downloader configuration. - :type config: FanslyConfig - - :param m3u8_url: The URL string of the M3U8 to download. - :type m3u8_url: str - - :param save_path: The suggested file to save the video to. - This will be changed to MP4 (.mp4). - :type save_path: Path - - :return: True if successful or False otherwise. - :rtype: bool - """ - CHUNK_SIZE = 65_536 - - # Remove file extension (.m3u8) from save_path - save_path = save_path.parent / save_path.stem - - cookies = get_m3u8_cookies(m3u8_url) - - m3u8_base_url, m3u8_file_url = split_url(m3u8_url) - - # download the m3u8 playlist - playlist_response = config.http_session.get( - m3u8_file_url, - headers=config.http_headers(), - cookies=cookies, - ) - - if playlist_response.status_code != 200: - print_error( - f'Failed downloading m3u8; at playlist_content request. ' - f'Response code: {playlist_response.status_code}\n' - f'{playlist_response.text}', - 12 - ) - return False - - playlist_text = playlist_response.text - - # parse the m3u8 playlist content using the m3u8 library - playlist: M3U8 = M3U8(playlist_text, base_uri=m3u8_base_url) - - # get a list of all the .ts files in the playlist - ts_files = [segment.absolute_uri for segment in playlist.segments if segment.uri.endswith('.ts')] - - # define a nested function to download a single .ts file and return the content - def download_ts(ts_url: str) -> bytes: - ts_response = config.http_session.get( - ts_url, - headers=config.http_headers(), - cookies=cookies, - stream=True, - ) - - buffer = io.BytesIO() - - for chunk in ts_response.iter_content(chunk_size=CHUNK_SIZE): - buffer.write(chunk) - - ts_content = buffer.getvalue() - - return ts_content - - # Display loading bar if there are many segments - progress = get_m3u8_progress( - disable_loading_bar=len(ts_files) < 15 - ) - - with progress: - with concurrent.futures.ThreadPoolExecutor() as executor: - segment_bytes_list = [ - file for file in progress.track( - executor.map(download_ts, ts_files), - total=len(ts_files) - ) - ] - - all_ts_bytes = bytearray() - - for segment_bytes in segment_bytes_list: - all_ts_bytes += segment_bytes - - input_container = av.open(io.BytesIO(all_ts_bytes), format='mpegts') - - first_video_stream: Optional[VideoStream] = None - first_audio_stream: Optional[AudioStream] = None - - for stream in input_container.streams.video: - first_video_stream = stream - break - - for stream in input_container.streams.audio: - first_audio_stream = stream - break - - has_video = first_video_stream is not None - has_audio = first_audio_stream is not None - - if not has_video and not has_audio: - raise M3U8Error(f'Neither audio nor video in M3U8 file: {m3u8_file_url}') - - # define output container and streams - output_container = av.open(f"{save_path}.mp4", 'w') # add .mp4 file extension - - video_stream: Optional[VideoStream] = None - audio_stream: Optional[AudioStream] = None - - if has_video: - video_stream = output_container.add_stream(template=first_video_stream) - - if has_audio: - audio_stream = output_container.add_stream(template=first_audio_stream) - - start_pts = None - - for packet in input_container.demux(): - - if packet.dts is None: - continue - - if start_pts is None: - start_pts = packet.pts - - packet.pts -= start_pts - packet.dts -= start_pts - - if packet.stream == first_video_stream: - packet.stream = video_stream - - elif packet.stream == first_audio_stream: - packet.stream = audio_stream - - output_container.mux(packet) - - # close containers - input_container.close() - output_container.close() - - return True - - -@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) +#@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) def download_m3u8( config: FanslyConfig, m3u8_url: str, diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index 983e888..a582283 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.13' -__date__ = '2024-01-20T19:41:00+01' +__version__ = '0.7.14' +__date__ = '2024-01-20T19:59:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ @@ -26,7 +26,7 @@ import base64 import traceback -from memory_profiler import profile +#from memory_profiler import profile from config import FanslyConfig, load_config, validate_adjust_config from config.args import parse_args, map_args_to_config @@ -67,7 +67,7 @@ def print_logo() -> None: print(f"{(100 - len(__version__) - 1)//2*' '}v{__version__}\n") -@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) +#@profile(precision=2, stream=open('memory_use.log', 'w', encoding='utf-8')) def main(config: FanslyConfig) -> int: """The main logic of the downloader program. diff --git a/requirements.txt b/requirements.txt index 84b92cc..07b4793 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,11 @@ -av==11.0.0 ImageHash==4.3.1 loguru==0.7.2 -m3u8==3.6.0 -memory-profiler -Pillow==10.1.0 +m3u8==4.0.0 +memory-profiler==0.61.0 +pillow==10.2.0 plyvel-ci==1.5.0 -psutil==5.9.6 -pyffmpeg +psutil==5.9.8 +pyffmpeg==2.4.2.18.1 python-dateutil==2.8.2 requests==2.31.0 rich==13.7.0 From 5c0a3871bb3d42d16cded4e3e18857cff9ab9b7d Mon Sep 17 00:00:00 2001 From: prof79 Date: Sun, 21 Jan 2024 20:52:27 +0100 Subject: [PATCH 04/13] I've devised cool selective MP4 hashing plus a command-line tool :) --- fileio/mp4.py | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++ mp4hash.py | 58 +++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 fileio/mp4.py create mode 100644 mp4hash.py diff --git a/fileio/mp4.py b/fileio/mp4.py new file mode 100644 index 0000000..e5f9317 --- /dev/null +++ b/fileio/mp4.py @@ -0,0 +1,108 @@ +"""MPEG-4 Binary File Manipulation""" + + +__all__ = [ + 'MP4Box', + 'hash_mp4file', + 'get_boxes', + 'hash_mp4box', +] + + +import os + +from io import BufferedReader +from pathlib import Path +from typing import Iterable, Optional, Callable + + +class MP4Box(object): + """Represents an MPEG-4 binary box/atom object. + """ + def __init__(self, size_bytes: bytes, fourcc_bytes: bytes, position: int) -> None: + self.position = position + self.size = int.from_bytes(size_bytes, byteorder='big') + self.fourcc = str(fourcc_bytes, encoding='ascii') + + + def __str__(self) -> str: + return f'MP4Box ( Position: {self.position}, FourCC: {self.fourcc}, Size: {self.size} )' + + + +def get_boxes(reader: BufferedReader) -> Iterable[MP4Box]: + position = 0 + first = True + + while reader.peek(): + box = MP4Box( + size_bytes=reader.read(4), + fourcc_bytes=reader.read(4), + position=position, + ) + + if first and box.fourcc != 'ftyp': + raise RuntimeError(f'Not an MP4 file.') + + first = False + + position += box.size + + reader.seek(position, os.SEEK_SET) + + yield box + + +def hash_mp4box(algorithm, reader: BufferedReader, box: MP4Box): + """Hashes an MPEG-4 box atom. + + `algorithm` must be a `hashlib` algorithm. + """ + CHUNK_SIZE = 65536 + + reader.seek(box.position, os.SEEK_SET) + + chunks = box.size // CHUNK_SIZE + remainder = box.size - chunks*CHUNK_SIZE + + for _ in range(chunks): + algorithm.update(reader.read(CHUNK_SIZE)) + + algorithm.update(reader.read(remainder)) + + +def hash_mp4file( + algorithm, + file_name: Path, + print: Optional[Callable]=None + ) -> str: + + if not file_name.exists(): + raise RuntimeError(f'{file_name} does not exist.') + + file_size = file_name.stat().st_size + + if file_size < 8: + raise RuntimeError('File is too small.') + + if print is not None: + print(f'File: {file_name}') + print() + + with open(file_name, 'rb') as mp4file: + + boxes = get_boxes(mp4file) + + for box in boxes: + if print is not None: + print(box) + + if box.fourcc != 'moov' and box.fourcc != 'mdat': + hash_mp4box(algorithm, mp4file, box) + + if print is not None: + print() + print(f'Hash: {algorithm.hexdigest()}') + print() + + return algorithm.hexdigest() diff --git a/mp4hash.py b/mp4hash.py new file mode 100644 index 0000000..872a9e8 --- /dev/null +++ b/mp4hash.py @@ -0,0 +1,58 @@ +"""MPEG-4 Selective Hashing Tool""" + + +import argparse +import hashlib + +from pathlib import Path +from rich import print + +from fileio.mp4 import hash_mp4file + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description='Hashes an MPEG-4 except the "moov" and "mdat" portions using MD5.', + ) + + parser.add_argument( + dest='file', + metavar='FILE', + help='MPEG4 file', + ) + + parser.add_argument( + '-d', '--debug', + required=False, + action='store_true', + default=False, + help='Debug output', + ) + + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + file_name = Path(args.file) + + md5 = hashlib.md5() + + hash = hash_mp4file(md5, file_name, print=print if args.debug else None) + + print(f'{hash}\t*{file_name.name}') + + +if __name__ == '__main__': + try: + main() + + except KeyboardInterrupt: + pass + + except Exception as ex: + print() + print(f"Unexpected error: {ex}") + print() + print() From 176c42f8bf4e375735cda692c8ac5f7aee7b02ca Mon Sep 17 00:00:00 2001 From: prof79 Date: Mon, 22 Jan 2024 19:59:13 +0100 Subject: [PATCH 05/13] Manual use of ffmpeg (Linux fix?) & new MP4 hashing. --- download/m3u8.py | 27 ++++++++++++++++++++------- fansly_downloader_ng.py | 16 ++++++++-------- fileio/fnmanip.py | 29 +++++++++++++++++++++-------- utils/ffmpeg.py | 25 +++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 23 deletions(-) create mode 100644 utils/ffmpeg.py diff --git a/download/m3u8.py b/download/m3u8.py index e85d9af..6371df0 100644 --- a/download/m3u8.py +++ b/download/m3u8.py @@ -4,15 +4,16 @@ import concurrent.futures #from memory_profiler import profile -from pyffmpeg import FFmpeg from m3u8 import M3U8 from pathlib import Path from rich.table import Column from rich.progress import BarColumn, TextColumn, Progress +from subprocess import CalledProcessError from typing import Any from config.fanslyconfig import FanslyConfig from errors import M3U8Error +from utils.ffmpeg import run_ffmpeg from utils.web import get_file_name_from_url, get_qs_value, split_url from textio import print_error @@ -145,7 +146,7 @@ def download_ts(segment_uri: str, segment_full_path: Path) -> None: with progress: with concurrent.futures.ThreadPoolExecutor() as executor: - _ = list( + list( progress.track( executor.map(download_ts, segment_uris, segment_files), total=len(segment_files) @@ -161,11 +162,23 @@ def download_ts(segment_uri: str, segment_full_path: Path) -> None: list_file.write('ffconcat version 1.0\n') list_file.writelines([f"file '{f.name}'\n" for f in segment_files]) - ffmpeg = FFmpeg(enable_log=config.debug) - - ffmpeg.options( - f'-f concat -i "{ffmpeg_list_file}" -c copy "{full_path}"' - ) + args = [ + '-f', + 'concat', + '-i', + str(ffmpeg_list_file), + '-c', + 'copy', + str(full_path), + ] + + try: + run_ffmpeg(args) + + except CalledProcessError as ex: + raise M3U8Error( + f'Error running ffmpeg - exit code {ex.returncode}: {ex.stderr}' + ) finally: #region Clean up diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index a582283..8bfa5ef 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.14' -__date__ = '2024-01-20T19:59:00+01' +__version__ = '0.7.17' +__date__ = '2024-01-22T19:53:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ @@ -110,13 +110,13 @@ def main(config: FanslyConfig) -> int: # M3U8 fixing interim print() - print_warning( - "THIS IS AN EXPERIMENTAL IMPROVED VIDEO DOWNLOAD VERSION -" - f"\n{' '*19} EXISTING VIDEOS WILL BE DOWNLOADED AGAIN/DE-DUPLICATION WILL NOT WORK" - f"\n{' '*19} FOR DOWNLOADS FROM OLDER VERSIONS!!!" - f"\n{' '*19} CTRL+C TO ABORT" + print_info( + "Due to important memory usage and video format bugfixes, " + "existing media items " + f"\n{' '*16} need to be re-hashed (`_hash_` to `_hash1_`)." + f"\n{' '*16} Affected files will automatically be renamed in the background." ) - input_enter_continue(config.interactive) + print() for creator_name in sorted(config.user_names): with Timer(creator_name): diff --git a/fileio/fnmanip.py b/fileio/fnmanip.py index 05c760e..0752c39 100644 --- a/fileio/fnmanip.py +++ b/fileio/fnmanip.py @@ -16,6 +16,8 @@ from download.downloadstate import DownloadState from textio import print_debug, print_error +from .mp4 import hash_mp4file + # turn off for our purpose unnecessary PIL safety features Image.MAX_IMAGE_PIXELS = None @@ -31,7 +33,7 @@ def extract_media_id(filename: str) -> int | None: return None -def extract_hash_from_filename(filename: str) -> str | None: +def extract_old_hash0_from_filename(filename: str) -> str | None: """Extracts the hash from an existing file's name.""" match = re.search(r'_hash_([a-fA-F0-9]+)', filename) @@ -41,10 +43,25 @@ def extract_hash_from_filename(filename: str) -> str | None: return None +def extract_hash_from_filename(filename: str) -> str | None: + """Extracts the hash from an existing file's name.""" + match = re.search(r'_hash1_([a-fA-F0-9]+)', filename) + + if match: + return match.group(1) + + return None + + def add_hash_to_filename(filename: Path, file_hash: str) -> str: """Adds a hash to an existing file's name.""" base_name, extension = str(filename.parent / filename.stem), filename.suffix - hash_suffix = f"_hash_{file_hash}{extension}" + old_hash_suffix = f"_hash_{file_hash}{extension}" + hash_suffix = f"_hash1_{file_hash}{extension}" + + # Remove old hash(es) + if extract_old_hash0_from_filename(str(filename)) is not None: + base_name = base_name.split('_hash_')[0] # adjust filename for 255 bytes filename limit, on all common operating systems max_length = 250 @@ -116,13 +133,9 @@ def add_hash_to_other_content(state: DownloadState, filepath: Path, content_form state.recent_audio_hashes.add(existing_hash) else: - h = hashlib.md5() - - with open(filepath, 'rb') as f: - while (part := f.read(1_048_576)): - h.update(part) + algorithm = hashlib.md5() - file_hash = h.hexdigest() + file_hash = hash_mp4file(algorithm, filepath) if content_format == 'video': state.recent_video_hashes.add(file_hash) diff --git a/utils/ffmpeg.py b/utils/ffmpeg.py new file mode 100644 index 0000000..5576abc --- /dev/null +++ b/utils/ffmpeg.py @@ -0,0 +1,25 @@ +"""FFmpeg Launcher Module""" + + +import subprocess + +from pyffmpeg import FFmpeg + + +def run_ffmpeg(args: list[str]) -> bool: + ffmpeg = FFmpeg(enable_log=False) + + ffmpeg_bin = ffmpeg.get_ffmpeg_bin() + + proc_args = [ffmpeg_bin] + + proc_args += args + + result = subprocess.run( + proc_args, + encoding='utf-8', + capture_output=True, + check=True, + ) + + return result.returncode == 0 From b5b349c8428a0a976d3d75fb3b367bef9062491d Mon Sep 17 00:00:00 2001 From: prof79 Date: Mon, 22 Jan 2024 22:09:13 +0100 Subject: [PATCH 06/13] Upped chunk sizes. --- download/m3u8.py | 2 +- fansly_downloader_ng.py | 4 ++-- fileio/mp4.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/download/m3u8.py b/download/m3u8.py index 6371df0..2ec27fb 100644 --- a/download/m3u8.py +++ b/download/m3u8.py @@ -71,7 +71,7 @@ def download_m3u8( :return: True if successful or False otherwise. :rtype: bool """ - CHUNK_SIZE = 65_536 + CHUNK_SIZE = 1_048_576 cookies = get_m3u8_cookies(m3u8_url) diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index 8bfa5ef..bc954bf 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.17' -__date__ = '2024-01-22T19:53:00+01' +__version__ = '0.7.18' +__date__ = '2024-01-22T22:08:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ diff --git a/fileio/mp4.py b/fileio/mp4.py index e5f9317..fa78c87 100644 --- a/fileio/mp4.py +++ b/fileio/mp4.py @@ -58,7 +58,7 @@ def hash_mp4box(algorithm, reader: BufferedReader, box: MP4Box): `algorithm` must be a `hashlib` algorithm. """ - CHUNK_SIZE = 65536 + CHUNK_SIZE = 1_048_576 reader.seek(box.position, os.SEEK_SET) From 439605ebb7af8e7f062ea6f28c554432a2bec8c9 Mon Sep 17 00:00:00 2001 From: prof79 Date: Fri, 26 Jan 2024 23:38:28 +0100 Subject: [PATCH 07/13] Added online references. --- fileio/mp4.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fileio/mp4.py b/fileio/mp4.py index fa78c87..c62bb92 100644 --- a/fileio/mp4.py +++ b/fileio/mp4.py @@ -1,4 +1,10 @@ -"""MPEG-4 Binary File Manipulation""" +"""MPEG-4 Binary File Manipulation + +Kudos to Alfred Gutierrez' (alfg) and Sanjeev Pandey's well-summarizing articles: + +https://dev.to/alfg/a-quick-dive-into-mp4-57fo +https://sanjeev-pandey.medium.com/understanding-the-mpeg-4-moov-atom-pseudo-streaming-in-mp4-93935e1b9e9a +""" __all__ = [ From 23c2f700f69c37d21fdd15f2bae4a8e7649bdd90 Mon Sep 17 00:00:00 2001 From: prof79 Date: Fri, 26 Jan 2024 23:40:06 +0100 Subject: [PATCH 08/13] Excluded binaries. --- .gitignore | 2 ++ fansly_downloader_ng.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9c9f982..2b57c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,5 @@ config.ini *.bak logo*.txt dummy.* +# Linux/macOS binary +fansly-downloader-ng diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index bc954bf..11d0003 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.18' -__date__ = '2024-01-22T22:08:00+01' +__version__ = '0.7.19' +__date__ = '2024-01-26T23:38:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ From 565634d0eb017eaca62cc9a59dc1f6e0263e14ed Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 27 Jan 2024 14:18:56 +0100 Subject: [PATCH 09/13] Improved/fixed existing file code. --- fansly_downloader_ng.py | 4 ++-- fileio/fnmanip.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index 11d0003..8dc2e37 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.19' -__date__ = '2024-01-26T23:38:00+01' +__version__ = '0.7.20' +__date__ = '2024-01-27T14:18:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ diff --git a/fileio/fnmanip.py b/fileio/fnmanip.py index 0752c39..23058f8 100644 --- a/fileio/fnmanip.py +++ b/fileio/fnmanip.py @@ -97,10 +97,10 @@ def add_hash_to_image(state: DownloadState, filepath: Path): new_filename = add_hash_to_filename(Path(filename), file_hash) new_filepath = filepath.parent / new_filename - filepath = filepath.rename(new_filepath) - - except FileExistsError: - filepath.unlink() + if new_filepath.exists(): + filepath.unlink() + else: + filepath.rename(new_filepath) except Exception: print_error(f"\nError processing image '{filepath}': {traceback.format_exc()}", 15) @@ -146,10 +146,10 @@ def add_hash_to_other_content(state: DownloadState, filepath: Path, content_form new_filename = add_hash_to_filename(Path(filename), file_hash) new_filepath = filepath.parent / new_filename - filepath = filepath.rename(new_filepath) - - except FileExistsError: - filepath.unlink() + if new_filepath.exists(): + filepath.unlink() + else: + filepath = filepath.rename(new_filepath) except Exception: print_error(f"\nError processing {content_format} '{filepath}': {traceback.format_exc()}", 16) From 7c3b10da2f898a8679f5ec9f337602543cf19695 Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 27 Jan 2024 14:46:18 +0100 Subject: [PATCH 10/13] Retries/delay were not honored from config.ini, stupid --- config/args.py | 8 ++++++-- config/config.py | 10 ++++++++++ fansly_downloader_ng.py | 4 ++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/config/args.py b/config/args.py index 4b3d680..d29c53d 100644 --- a/config/args.py +++ b/config/args.py @@ -218,7 +218,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( '-tr', '--timeline-retries', required=False, - default=1, + default=None, type=int, dest='timeline_retries', help="Number of retries on empty timelines. Defaults to 1. " @@ -229,7 +229,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( '-td', '--timeline-delay-seconds', required=False, - default=60, + default=None, type=int, dest='timeline_delay_seconds', help="Number of seconds to wait before retrying empty timelines. " @@ -460,6 +460,10 @@ def map_args_to_config(args: argparse.Namespace, config: FanslyConfig) -> None: check_attr(attr_name, attr_name) arg_attribute = getattr(args, attr_name) + if arg_attribute is None: + # No arg given, keep default or config.ini value + continue + int_value = 0 try: diff --git a/config/config.py b/config/config.py index 0ce0e89..473aeef 100644 --- a/config/config.py +++ b/config/config.py @@ -213,6 +213,7 @@ def load_config(config: FanslyConfig) -> None: metadata_handling = config._parser.get(options_section, 'metadata_handling', fallback='Advanced') config.metadata_handling = MetadataHandling(metadata_handling.lower()) + # Booleans config.download_media_previews = config._parser.getboolean(options_section, 'download_media_previews', fallback=True) config.open_folder_when_finished = config._parser.getboolean(options_section, 'open_folder_when_finished', fallback=True) config.separate_messages = config._parser.getboolean(options_section, 'separate_messages', fallback=True) @@ -222,6 +223,12 @@ def load_config(config: FanslyConfig) -> None: config.interactive = config._parser.getboolean(options_section, 'interactive', fallback=True) config.prompt_on_exit = config._parser.getboolean(options_section, 'prompt_on_exit', fallback=True) + # Numbers + config.timeline_retries = config._parser.getint(options_section, 'timeline_retries', fallback=1) + config.timeline_delay_seconds = config._parser.getint(options_section, 'timeline_delay_seconds', fallback=60) + + #region Renamed Options + # I renamed this to "use_duplicate_threshold" but retain older config.ini compatibility # True, False -> boolean if config._parser.has_option(options_section, 'utilise_duplicate_threshold'): @@ -231,6 +238,7 @@ def load_config(config: FanslyConfig) -> None: else: config.use_duplicate_threshold = config._parser.getboolean(options_section, 'use_duplicate_threshold', fallback=False) + # Renamed this to "use_folder_suffix" # True, False -> boolean if config._parser.has_option(options_section, 'use_suffix'): config.use_folder_suffix = config._parser.getboolean(options_section, 'use_suffix', fallback=True) @@ -240,6 +248,8 @@ def load_config(config: FanslyConfig) -> None: config.use_folder_suffix = config._parser.getboolean(options_section, 'use_folder_suffix', fallback=True) #endregion + + #endregion # Safe to save! :-) save_config_or_raise(config) diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index 8dc2e37..bb600c9 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.20' -__date__ = '2024-01-27T14:18:00+01' +__version__ = '0.7.22' +__date__ = '2024-01-27T14:45:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ From bcf66b5d7e0857f025e98e78ba9d163b78dfb0c6 Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 27 Jan 2024 17:04:37 +0100 Subject: [PATCH 11/13] Fixed file-in-use rename bug. --- fileio/fnmanip.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fileio/fnmanip.py b/fileio/fnmanip.py index 23058f8..ef5a973 100644 --- a/fileio/fnmanip.py +++ b/fileio/fnmanip.py @@ -88,19 +88,23 @@ def add_hash_to_image(state: DownloadState, filepath: Path): state.recent_photo_hashes.add(existing_hash) else: - with Image.open(filepath) as img: + file_hash = None + with Image.open(filepath) as img: file_hash = str(imagehash.phash(img, hash_size = 16)) - state.recent_photo_hashes.add(file_hash) - - new_filename = add_hash_to_filename(Path(filename), file_hash) - new_filepath = filepath.parent / new_filename + if file_hash is None: + raise RuntimeError('add_hash_to_image: file_hash should not be "None"') + + state.recent_photo_hashes.add(file_hash) + + new_filename = add_hash_to_filename(Path(filename), file_hash) + new_filepath = filepath.parent / new_filename - if new_filepath.exists(): - filepath.unlink() - else: - filepath.rename(new_filepath) + if new_filepath.exists(): + filepath.unlink() + else: + filepath.rename(new_filepath) except Exception: print_error(f"\nError processing image '{filepath}': {traceback.format_exc()}", 15) From 0334fd458d312e2e89db4f864a7b79a7f848d0f4 Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 27 Jan 2024 17:07:36 +0100 Subject: [PATCH 12/13] Version bump & doc for v0.7.23. --- README.md | 29 +++++++++++++++++++++++++---- ReleaseNotes.md | 27 +++++++++++++++++++++++++++ fansly_downloader_ng.py | 5 +++-- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ad3da26..a693824 100644 --- a/README.md +++ b/README.md @@ -115,11 +115,32 @@ This is a rewrite/refactoring of [Avnsx](https://github.com/Avnsx)'s original [F ## 📰 What's New (Release Notes) -### v0.7.10 2024-01-05 +### v0.7.23 2024-01-27 -Binary release fixing the [missing media downloads issue #3](../../issues/3). Thanks to all participants! -Also fixes a statistics message counting bug. -Summary release for v0.7.7-v0.7.9, no code changes in this one. +Video Fix Edition + +This version fixes some graving bugs in regard to video downloading: + +* Ludicrous memory usage, whole MPEG-4 files were buffered to RAM using up to several gigabytes ([#8](../../issues/8)) +* Manual re-muxing of MPEG streams which a) caused incompatibilites with certain media ([#9](../../issues/9)) and b) could also lead to malformed MPEG-4 files +* Hashing video files is tricky and broke due to the fix for ([#9](../../issues/9)) but was bound to unnoticeably break in the future anyway, like a timebomb + +As a side effect, existing files will be re-hashed and now have a `_hash1_` part instead of `_hash_`. The front remains the same. Sorry for the inconvenience. I also have plans for a new (opt-in) shorter naming scheme using a checksum probably but that's a story for another day. + +Along the way I also fixed a configuration file issue where timeline settings where not honored and a file-rename bug. + +Long read: + +Video files are actually split into chunks of several MPEG-TS streams in varying resolutions and a web video player can decide what to load in (adaptive streaming, DASH, whatever technology and naming). It is common to have such info in playlists using a text format called `M3U8`. So to get an MPEG-4 out of this you need to take the playlist with the highest resolution, fetch all MPEG-TS streams and merge them into an MPEG-4 file. This should be done by software written by video experts who know the standards, not by hand; Avnsx, for whatever reason, decided to re-mux the streams not only on-the-fly in RAM but also fixing DTS packet sequences by hand. People with some tech knowledge can see what all could go and went wrong with this and how I might feel about that. + +First, all streams (`.ts`) must be downloaded to disk first instead of buffering all to RAM. Second, regarding concatenation/merging a web search usually ends up with the go-to tool for manipulation of audio and video files - `ffmpeg`. Thus I ended up using `pyffmpeg` which is platform-independent and downloads an appropriate `ffmpeg` binary to help with re-encoding tasks. The lib misses some fixes regarding Linux support - but I could easily launch `ffmpeg` with appropriate arguments by hand. I then use the "demuxer" concat protocol of `ffmpeg` using a concat file (that gets deleted afterwards) to properly merge all streams into an MPEG-4 file, using copy-encoding, with proper timing info and no artifacts (except the original already had problems). This results in a structurally clean `.mp4`. + +Merging (concatenating) to a proper MPEG-4 file makes the file look totally different at first glance. Two vids downloaded with the old and new methods differ in file sizes and metadata info like bitrate and duration although they are essentially the same content-wise. What is more, I also discovered that all `libav*`-based software like `ffmpeg` and `PyAV` write the framework's version number into the user metadata portion of the `.mp4`. That's the timebomb I referred to, upgrade to a new library and files that would be the same suddenly differ. + +Using some online articles about the essentials of the MPEG-4 format I devised a new hashing method for `.mp4` files: I exclude the so-called `moov` and `mdat` boxes (or atoms) which essentially include all varying header data/metadata like bitrate, duration and so on and also have user data (`udta`) with the `Lavf` version as a sub-part. I'm no MPEG-4 expert at all so hopefully I haven't missed something essential here - but from my tests this works beautifully. The bytes of the audio-video-content itself are the same so they hash the same 🙂. +However, since there is no way to distinguish old-style from new-style hashed files I had to introduce a marker, like a version number, `_hash1_` - and re-hash all existing old-version files on program launch including images. Although image hashing has not changed, differentiating here would have only led to a buggy, unintelligible mess. + +Obviously, if a creator re-encoded existing material then the file will be totally different from a binary perspective - even though it may optically check out the same as a previous release; this would require something like a "perceptive hash" - but I still have doubts of that tech probably being too vague - and thus missing content. Therefore, after testing, I might remove pHashing from images in the future. For more details and history see: **[Release Notes](ReleaseNotes.md)** diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 36db462..3e31151 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -2,6 +2,33 @@ ## 🗒️ Release Notes +### v0.7.23 2024-01-27 + +Video Fix Edition + +This version fixes some graving bugs in regard to video downloading: + +* Ludicrous memory usage, whole MPEG-4 files were buffered to RAM using up to several gigabytes ([#8](../../issues/8)) +* Manual re-muxing of MPEG streams which a) caused incompatibilites with certain media ([#9](../../issues/9)) and b) could also lead to malformed MPEG-4 files +* Hashing video files is tricky and broke due to the fix for ([#9](../../issues/9)) but was bound to unnoticeably break in the future anyway, like a timebomb + +As a side effect, existing files will be re-hashed and now have a `_hash1_` part instead of `_hash_`. The front remains the same. Sorry for the inconvenience. I also have plans for a new (opt-in) shorter naming scheme using a checksum probably but that's a story for another day. + +Along the way I also fixed a configuration file issue where timeline settings where not honored and a file-rename bug. + +Long read: + +Video files are actually split into chunks of several MPEG-TS streams in varying resolutions and a web video player can decide what to load in (adaptive streaming, DASH, whatever technology and naming). It is common to have such info in playlists using a text format called `M3U8`. So to get an MPEG-4 out of this you need to take the playlist with the highest resolution, fetch all MPEG-TS streams and merge them into an MPEG-4 file. This should be done by software written by video experts who know the standards, not by hand; Avnsx, for whatever reason, decided to re-mux the streams not only on-the-fly in RAM but also fixing DTS packet sequences by hand. People with some tech knowledge can see what all could go and went wrong with this and how I might feel about that. + +First, all streams (`.ts`) must be downloaded to disk first instead of buffering all to RAM. Second, regarding concatenation/merging a web search usually ends up with the go-to tool for manipulation of audio and video files - `ffmpeg`. Thus I ended up using `pyffmpeg` which is platform-independent and downloads an appropriate `ffmpeg` binary to help with re-encoding tasks. The lib misses some fixes regarding Linux support - but I could easily launch `ffmpeg` with appropriate arguments by hand. I then use the "demuxer" concat protocol of `ffmpeg` using a concat file (that gets deleted afterwards) to properly merge all streams into an MPEG-4 file, using copy-encoding, with proper timing info and no artifacts (except the original already had problems). This results in a structurally clean `.mp4`. + +Merging (concatenating) to a proper MPEG-4 file makes the file look totally different at first glance. Two vids downloaded with the old and new methods differ in file sizes and metadata info like bitrate and duration although they are essentially the same content-wise. What is more, I also discovered that all `libav*`-based software like `ffmpeg` and `PyAV` write the framework's version number into the user metadata portion of the `.mp4`. That's the timebomb I referred to, upgrade to a new library and files that would be the same suddenly differ. + +Using some online articles about the essentials of the MPEG-4 format I devised a new hashing method for `.mp4` files: I exclude the so-called `moov` and `mdat` boxes (or atoms) which essentially include all varying header data/metadata like bitrate, duration and so on and also have user data (`udta`) with the `Lavf` version as a sub-part. I'm no MPEG-4 expert at all so hopefully I haven't missed something essential here - but from my tests this works beautifully. The bytes of the audio-video-content itself are the same so they hash the same 🙂. +However, since there is no way to distinguish old-style from new-style hashed files I had to introduce a marker, like a version number, `_hash1_` - and re-hash all existing old-version files on program launch including images. Although image hashing has not changed, differentiating here would have only led to a buggy, unintelligible mess. + +Obviously, if a creator re-encoded existing material then the file will be totally different from a binary perspective - even though it may optically check out the same as a previous release; this would require something like a "perceptive hash" - but I still have doubts of that tech probably being too vague - and thus missing content. Therefore, after testing, I might remove pHashing from images in the future. + ### v0.7.10 2024-01-05 Binary release fixing the [missing media downloads issue #3](../../issues/3). Thanks to all participants! diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index bb600c9..4a3674d 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.22' -__date__ = '2024-01-27T14:45:00+01' +__version__ = '0.7.23' +__date__ = '2024-01-27T17:06:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [ @@ -18,6 +18,7 @@ 'KasumiDev', ] +# TODO: Remove pyffmpeg's "Github Activeness" message # TODO: Fix in future: audio needs to be properly transcoded from mp4 to mp3, instead of just saved as # TODO: Rate-limiting fix works but is terribly slow - would be nice to know how to interface with Fansly API properly # TODO: Check whether messages are rate-limited too or not From 21a5db96f4a6186eb7f6e9f4e9ecbe1708940678 Mon Sep 17 00:00:00 2001 From: prof79 Date: Sat, 27 Jan 2024 17:11:51 +0100 Subject: [PATCH 13/13] Changed to v0.8.0 due to major and somewhat breaking changes. --- README.md | 2 +- ReleaseNotes.md | 2 +- fansly_downloader_ng.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a693824..fd5e475 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ This is a rewrite/refactoring of [Avnsx](https://github.com/Avnsx)'s original [F ## 📰 What's New (Release Notes) -### v0.7.23 2024-01-27 +### v0.8.0 2024-01-27 Video Fix Edition diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 3e31151..88c52b4 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -2,7 +2,7 @@ ## 🗒️ Release Notes -### v0.7.23 2024-01-27 +### v0.8.0 2024-01-27 Video Fix Edition diff --git a/fansly_downloader_ng.py b/fansly_downloader_ng.py index 4a3674d..8060e01 100644 --- a/fansly_downloader_ng.py +++ b/fansly_downloader_ng.py @@ -2,8 +2,8 @@ """Fansly Downloader NG""" -__version__ = '0.7.23' -__date__ = '2024-01-27T17:06:00+01' +__version__ = '0.8.0' +__date__ = '2024-01-27T17:10:00+01' __maintainer__ = 'prof79' __copyright__ = f'Copyright (C) 2023-2024 by {__maintainer__}' __authors__ = [