From 6a54d249881976ea54b0aa1cb400cfbe5baca5dd Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Thu, 16 Nov 2023 12:53:20 +0100 Subject: [PATCH 1/9] Updated to MediaCatch s2t api v2 --- src/mediacatch_s2t/__init__.py | 31 ++- src/mediacatch_s2t/__main__.py | 4 +- src/mediacatch_s2t/uploader.py | 376 +++++++-------------------------- 3 files changed, 89 insertions(+), 322 deletions(-) diff --git a/src/mediacatch_s2t/__init__.py b/src/mediacatch_s2t/__init__.py index db530df..9f8a074 100644 --- a/src/mediacatch_s2t/__init__.py +++ b/src/mediacatch_s2t/__init__.py @@ -3,29 +3,22 @@ """ # Version of the mc-s2t-mediacatch_s2t -__version__ = '1.1.0' +__version__ = '2.0.0' import os URL: str = os.environ.get('MEDIACATCH_URL', 'https://s2t.mediacatch.io') -SINGLE_UPLOAD_ENDPOINT: str = os.environ.get( - 'MEDIACATCH_PRESIGN_ENDPOINT', - '/presigned-post-url') -MULTIPART_UPLOAD_CREATE_ENDPOINT: str = os.environ.get( - 'MEDIACATCH_MULTIPART_UPLOAD_CREATE_ENDPOINT', - '/multipart-upload/id') -MULTIPART_UPLOAD_URL_ENDPOINT: str = os.environ.get( - 'MEDIACATCH_MULTIPART_UPLOAD_URL_ENDPOINT', - '/multipart-upload/url') -MULTIPART_UPLOAD_COMPLETE_ENDPOINT: str = os.environ.get( - 'MEDIACATCH_MULTIPART_UPLOAD_COMPLETE_ENDPOINT', - '/multipart-upload/complete') -UPDATE_STATUS_ENDPOINT: str = os.environ.get( - 'MEDIACATCH_UPDATE_STATUS_ENDPOINT', - '/upload-completed') +UPLOAD_CREATE_ENDPOINT: str = os.environ.get( + 'MEDIACATCH_UPLOAD_CREATE_ENDPOINT', + '/upload/') +UPLOAD_URL_ENDPOINT: str = os.environ.get( + 'MEDIACATCH_UPLOAD_URL_ENDPOINT', + '/upload/{file_id}/{part_number}') +UPLOAD_COMPLETE_ENDPOINT: str = os.environ.get( + 'MEDIACATCH_UPLOAD_COMPLETE_ENDPOINT', + '/upload/{file_id}/complete') TRANSCRIPT_ENDPOINT: str = os.environ.get( - 'MEDIACATCH_TRANSCRIPT_ENDPOINT', '/result') -PROCESSING_TIME_RATIO: float = 0.1 -MULTIPART_FILESIZE: int = 1 * 1024 * 1024 * 1024 + 'MEDIACATCH_TRANSCRIPT_ENDPOINT', + '/result/{file_id}') ENABLE_AUTOMATIC_UPDATE: bool = True diff --git a/src/mediacatch_s2t/__main__.py b/src/mediacatch_s2t/__main__.py index 37505bb..5059bf4 100644 --- a/src/mediacatch_s2t/__main__.py +++ b/src/mediacatch_s2t/__main__.py @@ -13,13 +13,13 @@ def main() -> None: ) parser.add_argument("api_key", type=str, help="MediaCatch API key.") parser.add_argument("file", type=str, help="A media file.") - parser.add_argument("--language", type=str, default='da', help="The main language in the file.") + parser.add_argument("--quota", type=str, default='any', help="The main language in the file.") args = parser.parse_args() console = Console() with console.status( "[bold green]Uploading file to MediaCatch..."): - result = uploader.upload_and_get_transcription(args.file, args.api_key, args.language) + result = uploader.upload_and_get_transcription(args.file, args.api_key, args.quota) if result['status'] == 'error': sys.exit( f"Error occurred:\n{result['message']}. " diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index eefcc27..b3e4924 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -1,31 +1,17 @@ -import abc import os -import pathlib -import threading - import requests -import subprocess -import json -from typing import NamedTuple - -from langcodes import standardize_tag +import threading +from abc import ABC +from pathlib import Path from mediacatch_s2t import ( URL, - SINGLE_UPLOAD_ENDPOINT, TRANSCRIPT_ENDPOINT, UPDATE_STATUS_ENDPOINT, - MULTIPART_UPLOAD_CREATE_ENDPOINT, MULTIPART_UPLOAD_URL_ENDPOINT, - MULTIPART_UPLOAD_COMPLETE_ENDPOINT, - PROCESSING_TIME_RATIO, MULTIPART_FILESIZE + TRANSCRIPT_ENDPOINT, + UPLOAD_CREATE_ENDPOINT, UPLOAD_URL_ENDPOINT, UPLOAD_COMPLETE_ENDPOINT ) from mediacatch_s2t.helper import update_myself -class FFProbeResult(NamedTuple): - return_code: int - json: str - error: str - - class UploaderException(Exception): message = "Error from uploader module" @@ -39,36 +25,35 @@ def __str__(self): return self.message -class UploaderBase(metaclass=abc.ABCMeta): - def __init__(self, file, api_key, language='da'): - self.file = file - self.api_key = api_key - self.language = standardize_tag(language) - self.file_id = None - - def _is_file_exist(self): - return pathlib.Path(self.file).is_file() +class UploaderBase(ABC): + def __init__(self) -> None: + super().__init__() + self.result = { + "url": "", + "status": "", + "estimated_processing_time": 0, + "message": "" + } - def is_multipart_upload(self) -> bool: - if self._is_file_exist(): - filesize = os.path.getsize(self.file) - if filesize > MULTIPART_FILESIZE: - return True - return False + def _get_headers(self) -> dict: + return { + "Content-type": "application/json", + "X-API-KEY": self.api_key, + "X-Quota": self.quota + } - def _is_response_error(self, response): - if response.status_code >= 400: - if response.status_code == 401: - return True, response.json()['message'] - return True, response.json()['message'] - return False, '' + def _is_file_exist(self): + return self.file.is_file() - def _make_post_request(self, *args, **kwargs): + def _make_request(self, type, *args, **kwargs): """Make post request with retry mechanism.""" call_limit = 3 is_error, msg = True, "Have not made a request call." for _call in range(call_limit): - response = requests.post(*args, **kwargs) + if type == 'get': + response = requests.get(*args, **kwargs) + elif type == 'post': + response = requests.post(*args, **kwargs) is_error, msg = self._is_response_error(response) if not is_error: break @@ -80,239 +65,57 @@ def _make_post_request(self, *args, **kwargs): f"Error during post request {url}; {msg}" ) return response + + def _is_response_error(self, response): + if response.status_code >= 400: + if response.status_code == 401: + return True, response.json()['detail'] + return True, response.json()['detail'] + return False, '' - @property - def _transcript_link(self): - return f"{URL}{TRANSCRIPT_ENDPOINT}?id={self.file_id}&api_key={self.api_key}" - - @staticmethod - def _ffprobe(file_path) -> FFProbeResult: - command_array = ["ffprobe", - "-v", "quiet", - "-print_format", "json", - "-show_format", - "-show_streams", - file_path] - result = subprocess.run(command_array, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True) - return FFProbeResult(return_code=result.returncode, - json=json.loads(result.stdout), - error=result.stderr) - - def get_duration(self): - """Get audio track duration of a file. - - :return - tuple: (duration_in_miliseconds, stream_json | error_msg) - """ - try: - probe = self._ffprobe(self.file) - except OSError as e: - return 0, 'FFmpeg not installed (sudo apt install ffmpeg)' - if probe.return_code: - return 0, probe.error - else: - try: - for stream in probe.json['streams']: - if stream['codec_type'] == 'audio': - return int(float(stream['duration']) * 1000), stream - else: - return 0, "The file doesn't have an audio track" - except Exception: - if 'duration' in probe.json['format']: - return int(float(probe.json['format']['duration']) * 1000), probe.json['format'] - else: - return 0, "Duration couldn't be found for audio track" - - def estimated_result_time(self, audio_length=0): - """Estimated processing time in seconds""" - - if not isinstance(audio_length, int): - return 0 - processing_time = PROCESSING_TIME_RATIO * audio_length - return round(processing_time / 1000) - - def _post_file(self, url, data): - with open(self.file, 'rb') as f: - response = self._make_post_request( - url, - data=data, - files={'file': f} - ) - return response - - def _get_transcript_link(self): - self._make_post_request( - url=f'{URL}{UPDATE_STATUS_ENDPOINT}', - json={"id": self.file_id}, - headers={ - "Content-type": 'application/json', - "X-API-KEY": self.api_key, - "X-LANG": self.language - } - ) - return self._transcript_link - - @abc.abstractmethod - def upload_file(self): - result = { - "url": "", - "status": "uploaded", - "estimated_processing_time": 0, - "message": "The file has been uploaded." - } - return result - + def _set_result_error_message(self, msg) -> None: + self.result["status"] = "error" + self.result["message"] = msg class Uploader(UploaderBase): - """Uploader Class - - This class is to send a file to the API server. - The API server currently only allows file less than 4gb - to be sent with this upload class. - """ - - def _get_upload_url(self, mime_file): - response = self._make_post_request( - url=f'{URL}{SINGLE_UPLOAD_ENDPOINT}', - json=mime_file, - headers={ - "Content-type": 'application/json', - "X-API-KEY": self.api_key, - "X-LANG": self.language - } - ) - response_data = json.loads(response.text) - url = response_data.get('url') - data = response_data.get('fields') - _id = response_data.get('id') - return { - "url": url, - "fields": data, - "id": _id - } - - def upload_file(self): - result = { - "url": "", - "status": "", - "estimated_processing_time": 0, - "message": "" - } - if not self._is_file_exist(): - result["status"] = "error" - result["message"] = "The file doesn't exist" - return result - - file_duration, msg = self.get_duration() - if not file_duration: - result["status"] = "error" - result["message"] = msg - return result - - mime_file = { - "duration": file_duration, - "filename": pathlib.Path(self.file).name, - "file_ext": pathlib.Path(self.file).suffix, - "filesize": os.path.getsize(self.file), - "language": self.language, - } - try: - _upload_url = self._get_upload_url(mime_file) - url = _upload_url.get('url') - data = _upload_url.get('fields') - self.file_id = _upload_url.get('id') - - self._post_file(url, data) - transcript_link = self._get_transcript_link() - except UploaderException as e: - result["status"] = "error" - result["message"] = str(e) - return result - - result = { - "url": transcript_link, - "status": "uploaded", - "estimated_processing_time": self.estimated_result_time( - file_duration), - "message": "The file has been uploaded." - } - return result - + def __init__(self, file, api_key, quota) -> None: + super().__init__() + self.file = Path(file) -class ChunkedFileUploader(UploaderBase): - """Multipart Uploader Class + self.file_name = self.file.name + self.file_extension = self.file.suffix + self.file_size = os.path.getsize(self.file) - This class is to split a bigfile into chunked files, and send them - with multipart upload method. - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.filename = pathlib.Path(self.file).name - self.file_ext = pathlib.Path(self.file).suffix - self.filesize = os.path.getsize(self.file) + self.api_key = api_key + self.quota = quota self.file_id: str = "" self.chunk_maxsize: int = 0 - self.total_chunks: int = 0 self.upload_id: str = "" - self.endpoint_create: str = f"{URL}{MULTIPART_UPLOAD_CREATE_ENDPOINT}" - self.endpoint_signed_url: str = f"{URL}{MULTIPART_UPLOAD_URL_ENDPOINT}" - self.endpoint_complete: str = f"{URL}{MULTIPART_UPLOAD_COMPLETE_ENDPOINT}" + self.endpoint_create: str = f"{URL}{UPLOAD_CREATE_ENDPOINT}" + self.endpoint_signed_url: str = f"{URL}{UPLOAD_URL_ENDPOINT}" + self.endpoint_complete: str = f"{URL}{UPLOAD_COMPLETE_ENDPOINT}" self.headers: dict = self._get_headers() self.etags: list = [] - self.result = { - "url": "", - "status": "", - "estimated_processing_time": 0, - "message": "" - } - - def _get_headers(self) -> dict: - return { - "Content-type": "application/json", - "X-API-KEY": self.api_key, - "X-LANG": self.language - } - - def _set_result_error_message(self, msg) -> None: - self.result["status"] = "error" - self.result["message"] = msg - - def _set_metadata(self, file_id: str, chunk_maxsize: int, - total_chunks: int, upload_id: str) -> None: - self.file_id = file_id - self.chunk_maxsize = chunk_maxsize - self.total_chunks = total_chunks - self.upload_id = upload_id - return None - - def create_multipart_upload(self, mime_file: dict) -> dict: - response = self._make_post_request( + def start_file_upload(self, mime_file: dict) -> dict: + response = self._make_request( + type='post', url=self.endpoint_create, headers=self.headers, json=mime_file ) data: dict = response.json() - return { - "chunk_maxsize": data["chunk_maxsize"], - "file_id": data["file_id"], - "total_chunks": data["total_chunks"], - "upload_id": data["upload_id"] - } + return data["file_id"] def chop_and_upload_chunk(self) -> None: threads = [] with open(self.file, 'rb') as f: - part_number = 0 + part_number = 1 while True: - part_number += 1 - chunk_size = self.chunk_maxsize + chunk_size = 100 * 1024 * 1024 chunk = f.read(chunk_size) if not chunk: break @@ -320,42 +123,32 @@ def chop_and_upload_chunk(self) -> None: args=(part_number, chunk)) threads.append(thread) thread.start() + part_number += 1 for thread in threads: thread.join() return None - def _get_signed_url(self, part_number: int) -> str: - response = self._make_post_request( - url=self.endpoint_signed_url, - headers=self.headers, - json={ - "file_id": self.file_id, - "upload_id": self.upload_id, - "part_number": part_number - } - ) - data: dict = response.json() - return data["url"] - def _upload_data_chunk_to_bucket(self, url: str, file_data: bytes) -> str: response: requests.Response = requests.put(url=url, data=file_data) etag: str = response.headers['ETag'] return etag def upload_part(self, part_number: int, file_data: bytes) -> None: - url = self._get_signed_url(part_number) - etag = self._upload_data_chunk_to_bucket(url, file_data) - self.etags.append({'ETag': etag, 'PartNumber': part_number}) + response = self._make_request( + type='get', + url=self.endpoint_signed_url.format(file_id=self.file_id, part_number=part_number), + headers=self.headers, + ).json() + etag = self._upload_data_chunk_to_bucket(response['url'], file_data) + self.etags.append({'e_tag': etag, 'part_number': part_number}) return None def complete_the_upload(self) -> bool: - response: requests.Response = self._make_post_request( - url=self.endpoint_complete, + response: requests.Response = self._make_request( + type='post', + url=self.endpoint_complete.format(file_id=self.file_id), headers=self.headers, - json={ - "file_id": self.file_id, - "parts": self.etags - } + json={"parts": self.etags} ) if response.status_code != 201: return False @@ -366,50 +159,31 @@ def upload_file(self): self._set_result_error_message("The file doesn't exist") return self.result - file_duration, msg = self.get_duration() - if not file_duration: - self._set_result_error_message(msg) - return self.result - mime_file = { - "duration": file_duration, - "filename": self.filename, - "file_ext": self.file_ext, - "filesize": self.filesize, - "language": self.language, + "file_name": self.file_name, + "file_extension": self.file_extension, + "file_size": self.file_size, + "quota": self.quota, } try: - meta = self.create_multipart_upload(mime_file) - self._set_metadata( - file_id=meta["file_id"], - chunk_maxsize=meta["chunk_maxsize"], - total_chunks=meta["total_chunks"], - upload_id=meta["upload_id"] - ) + self.file_id = self.start_file_upload(mime_file) self.chop_and_upload_chunk() self.complete_the_upload() - transcript_link = self._get_transcript_link() except Exception as e: self._set_result_error_message(str(e)) return self.result self.result = { - "url": transcript_link, + "url": URL + TRANSCRIPT_ENDPOINT.format(file_id=self.file_id), "status": "uploaded", - "estimated_processing_time": self.estimated_result_time( - file_duration), + "estimated_processing_time": 0, "message": "The file has been uploaded." } return self.result -def upload_and_get_transcription(file, api_key, language) -> dict: - is_multipart_upload: bool = Uploader( - file, api_key, language).is_multipart_upload() - if is_multipart_upload: - result: dict = ChunkedFileUploader(file, api_key, language).upload_file() - else: - result: dict = Uploader(file, api_key, language).upload_file() +def upload_and_get_transcription(file, api_key, quota) -> dict: + result: dict = Uploader(file, api_key, quota).upload_file() update_myself() return result From 542161d141d63a153f282bfcf1c1e9d34db01aad Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Thu, 23 Nov 2023 10:33:30 +0100 Subject: [PATCH 2/9] Change file_size to kB --- src/mediacatch_s2t/uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index b3e4924..d9de159 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -84,7 +84,7 @@ def __init__(self, file, api_key, quota) -> None: self.file_name = self.file.name self.file_extension = self.file.suffix - self.file_size = os.path.getsize(self.file) + self.file_size = os.path.getsize(self.file) // 1024 self.api_key = api_key self.quota = quota From 4e50b1dea6d9fd5e58114dfbc3396d836e4eb804 Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Wed, 13 Dec 2023 13:05:07 +0100 Subject: [PATCH 3/9] Remove file_size from request --- src/mediacatch_s2t/uploader.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index d9de159..d7c084b 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -84,7 +84,6 @@ def __init__(self, file, api_key, quota) -> None: self.file_name = self.file.name self.file_extension = self.file.suffix - self.file_size = os.path.getsize(self.file) // 1024 self.api_key = api_key self.quota = quota @@ -162,7 +161,6 @@ def upload_file(self): mime_file = { "file_name": self.file_name, "file_extension": self.file_extension, - "file_size": self.file_size, "quota": self.quota, } try: From 5d9a256be9d7528982de962cd17e5cecc8927c55 Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Wed, 13 Dec 2023 13:05:39 +0100 Subject: [PATCH 4/9] Removed unused import --- src/mediacatch_s2t/uploader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index d7c084b..3ae70b6 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -1,4 +1,3 @@ -import os import requests import threading from abc import ABC From d2d3d370ddd2bdd79608ea74393d4e1fcfae577e Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Thu, 21 Dec 2023 10:45:53 +0100 Subject: [PATCH 5/9] Refactored uploader code and made quota optional --- pyproject.toml | 2 +- src/mediacatch_s2t/__init__.py | 2 +- src/mediacatch_s2t/__main__.py | 2 +- src/mediacatch_s2t/uploader.py | 336 ++++++++++++++++++--------------- 4 files changed, 192 insertions(+), 150 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b106567..82d68a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mediacatch-s2t" -version = '1.1.0' +version = '2.0.0' description = "Upload a media file and get the transcription link." readme = "README.md" authors = [{ name = "MediaCatch", email = "support@mediacatch.io" }] diff --git a/src/mediacatch_s2t/__init__.py b/src/mediacatch_s2t/__init__.py index 9f8a074..8c59d81 100644 --- a/src/mediacatch_s2t/__init__.py +++ b/src/mediacatch_s2t/__init__.py @@ -7,7 +7,7 @@ import os -URL: str = os.environ.get('MEDIACATCH_URL', 'https://s2t.mediacatch.io') +URL: str = os.environ.get('MEDIACATCH_URL', 'https://s2t.mediacatch.io/api/v2') UPLOAD_CREATE_ENDPOINT: str = os.environ.get( 'MEDIACATCH_UPLOAD_CREATE_ENDPOINT', '/upload/') diff --git a/src/mediacatch_s2t/__main__.py b/src/mediacatch_s2t/__main__.py index 5059bf4..a632b9c 100644 --- a/src/mediacatch_s2t/__main__.py +++ b/src/mediacatch_s2t/__main__.py @@ -13,7 +13,7 @@ def main() -> None: ) parser.add_argument("api_key", type=str, help="MediaCatch API key.") parser.add_argument("file", type=str, help="A media file.") - parser.add_argument("--quota", type=str, default='any', help="The main language in the file.") + parser.add_argument("--quota", type=str, default=None, help="The quota to bill usage to. Defaults to None.") args = parser.parse_args() console = Console() diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index 3ae70b6..dca4cc8 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -1,7 +1,7 @@ import requests -import threading -from abc import ABC +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path +from typing import Dict, Optional, Generator from mediacatch_s2t import ( URL, @@ -12,175 +12,217 @@ class UploaderException(Exception): - message = "Error from uploader module" + """Custom exception class for handling errors within the Uploader class. + + Attributes: + message (str): The error message to be displayed. + cause (Exception, optional): The original exception that caused this error, if any. + """ - def __init__(self, cause=None): - self.cause = cause + def __init__(self, message: str, cause: Optional[Exception] = None) -> None: + super().__init__(f"{message}: {str(cause)}" if cause else message) - def __str__(self): - if self.cause: - return "{}: {}".format(self.message, str(self.cause)) - else: - return self.message +class Uploader: + """Handles the uploading of files to a server and manages the file upload process. -class UploaderBase(ABC): - def __init__(self) -> None: - super().__init__() - self.result = { - "url": "", - "status": "", - "estimated_processing_time": 0, - "message": "" - } + Attributes: + file_path (Path): Path of the file to be uploaded. + api_key (str): API key for authentication. + quota (int): Quota limit for the user. + file_id (str): Unique identifier for the file once uploaded. + etags (list): List of ETag values for each uploaded chunk. + endpoints (dict): Endpoints for file creation, signed URL generation, and completion. + headers (dict): Headers to be used for HTTP requests. + """ - def _get_headers(self) -> dict: - return { - "Content-type": "application/json", - "X-API-KEY": self.api_key, - "X-Quota": self.quota - } + CHUNK_SIZE = 100 * 1024 * 1024 # 100 MB + REQUEST_RETRY_LIMIT = 3 - def _is_file_exist(self): - return self.file.is_file() - - def _make_request(self, type, *args, **kwargs): - """Make post request with retry mechanism.""" - call_limit = 3 - is_error, msg = True, "Have not made a request call." - for _call in range(call_limit): - if type == 'get': - response = requests.get(*args, **kwargs) - elif type == 'post': - response = requests.post(*args, **kwargs) - is_error, msg = self._is_response_error(response) - if not is_error: - break - if is_error: - url = kwargs.get('url') - if not url: - url, *rest = args - raise UploaderException( - f"Error during post request {url}; {msg}" - ) - return response - - def _is_response_error(self, response): - if response.status_code >= 400: - if response.status_code == 401: - return True, response.json()['detail'] - return True, response.json()['detail'] - return False, '' - - def _set_result_error_message(self, msg) -> None: - self.result["status"] = "error" - self.result["message"] = msg - -class Uploader(UploaderBase): - def __init__(self, file, api_key, quota) -> None: - super().__init__() - self.file = Path(file) - - self.file_name = self.file.name - self.file_extension = self.file.suffix + def __init__(self, file: str, api_key: str, quota: int, max_threads: int = 5) -> None: + self.file_path = Path(file) + if not self.file_path.is_file(): + raise FileNotFoundError(f"The file {file} does not exist") self.api_key = api_key self.quota = quota + self.file_id = "" + self.etags = [] + self.endpoints = { + "create": f"{URL}{UPLOAD_CREATE_ENDPOINT}", + "signed_url": f"{URL}{UPLOAD_URL_ENDPOINT}", + "complete": f"{URL}{UPLOAD_COMPLETE_ENDPOINT}", + "result": f"{URL}{TRANSCRIPT_ENDPOINT}" + } + self.headers = { + "Content-type": "application/json", + "X-API-KEY": self.api_key, + "X-Quota": str(self.quota) + } + self.max_threads = max_threads - self.file_id: str = "" - self.chunk_maxsize: int = 0 - self.upload_id: str = "" + def upload_file(self) -> Dict[str, str]: + """Initiates and manages the file upload process. - self.endpoint_create: str = f"{URL}{UPLOAD_CREATE_ENDPOINT}" - self.endpoint_signed_url: str = f"{URL}{UPLOAD_URL_ENDPOINT}" - self.endpoint_complete: str = f"{URL}{UPLOAD_COMPLETE_ENDPOINT}" - self.headers: dict = self._get_headers() + Returns: + Dict[str, str]: A dictionary containing the result of the upload process. + """ + try: + self.file_id = self.initiate_file_upload() + self.upload_file_chunks() + self.finalize_upload() + return self.get_upload_result() + except Exception as e: + raise UploaderException("Failed to upload file", e) - self.etags: list = [] + def initiate_file_upload(self) -> str: + """Starts the file upload process by creating a new upload session on the server. - def start_file_upload(self, mime_file: dict) -> dict: - response = self._make_request( - type='post', - url=self.endpoint_create, - headers=self.headers, - json=mime_file + Returns: + str: The file ID assigned by the server. + """ + mime_file = { + "file_name": self.file_path.name, + "file_extension": self.file_path.suffix, + "quota": self.quota, + } + response = self._make_request('post', self.endpoints['create'], json=mime_file) + return response.json()["file_id"] + + def upload_file_chunks(self) -> None: + """Splits the file into chunks and uploads each chunk to the server in parallel.""" + with ThreadPoolExecutor(max_workers=self.max_threads) as executor, self.file_path.open('rb') as file: + futures = {executor.submit(self.upload_chunk, part_number, chunk): part_number + for part_number, chunk in enumerate(self._read_file_in_chunks(file), start=1)} + + for future in as_completed(futures): + part_number = futures[future] + try: + future.result() + except Exception as e: + print(f"Chunk {part_number} failed to upload due to: {e}") + + def _read_file_in_chunks(self, file) -> Generator[bytes, None, None]: + """Generator that reads the file in chunks. + + Args: + file (IO[bytes]): File object opened in binary read mode. + + Yields: + bytes: A chunk of the file. + """ + while True: + chunk = file.read(self.CHUNK_SIZE) + if not chunk: + break + yield chunk + + def upload_chunk(self, part_number: int, chunk: bytes) -> None: + """Uploads a single chunk of the file to the server. + + Args: + part_number (int): The part number of the chunk in the sequence. + chunk (bytes): The file data to upload. + """ + signed_url_response = self._make_request( + 'get', + self.endpoints['signed_url'].format(file_id=self.file_id, part_number=part_number) ) - data: dict = response.json() - return data["file_id"] - - def chop_and_upload_chunk(self) -> None: - threads = [] - with open(self.file, 'rb') as f: - part_number = 1 - while True: - chunk_size = 100 * 1024 * 1024 - chunk = f.read(chunk_size) - if not chunk: - break - thread = threading.Thread(target=self.upload_part, - args=(part_number, chunk)) - threads.append(thread) - thread.start() - part_number += 1 - for thread in threads: - thread.join() - return None - - def _upload_data_chunk_to_bucket(self, url: str, file_data: bytes) -> str: - response: requests.Response = requests.put(url=url, data=file_data) - etag: str = response.headers['ETag'] - return etag - - def upload_part(self, part_number: int, file_data: bytes) -> None: - response = self._make_request( - type='get', - url=self.endpoint_signed_url.format(file_id=self.file_id, part_number=part_number), - headers=self.headers, - ).json() - etag = self._upload_data_chunk_to_bucket(response['url'], file_data) + signed_url = signed_url_response.json()['url'] + etag = self._upload_chunk_to_storage(signed_url, chunk) self.etags.append({'e_tag': etag, 'part_number': part_number}) - return None - def complete_the_upload(self) -> bool: - response: requests.Response = self._make_request( - type='post', - url=self.endpoint_complete.format(file_id=self.file_id), - headers=self.headers, - json={"parts": self.etags} - ) - if response.status_code != 201: - return False - return True + def _upload_chunk_to_storage(self, url: str, chunk: bytes) -> str: + """Uploads a chunk of data to a given URL. - def upload_file(self): - if not self._is_file_exist(): - self._set_result_error_message("The file doesn't exist") - return self.result + Args + (url (str): The URL to which the chunk will be uploaded. + chunk (bytes): The chunk of data to be uploaded. - mime_file = { - "file_name": self.file_name, - "file_extension": self.file_extension, - "quota": self.quota, - } - try: - self.file_id = self.start_file_upload(mime_file) + Returns: + str: The ETag header value returned by the server, identifying the chunk. + """ + response = requests.put(url, data=chunk) + return response.headers['ETag'] - self.chop_and_upload_chunk() - self.complete_the_upload() - except Exception as e: - self._set_result_error_message(str(e)) - return self.result + def finalize_upload(self) -> None: + """Finalizes the file upload process, indicating all chunks have been uploaded.""" + self._make_request( + 'post', + self.endpoints['complete'].format(file_id=self.file_id), + json={"parts": self.etags} + ) - self.result = { - "url": URL + TRANSCRIPT_ENDPOINT.format(file_id=self.file_id), + def get_upload_result(self) -> Dict[str, str]: + """Constructs the final result of the file upload process. + + Returns: + Dict[str, str]: A dictionary containing details of the upload, including the file URL. + """ + return { + "url": self.endpoints['result'].format(file_id=self.file_id), "status": "uploaded", "estimated_processing_time": 0, "message": "The file has been uploaded." } - return self.result + def _make_request(self, method: str, url: str, **kwargs) -> requests.Response: + """Makes an HTTP request with the specified method, URL, and additional arguments. + + Args: + method (str): The HTTP method to use (e.g., 'get', 'post'). + url (str): The URL for the request. + **kwargs: Additional keyword arguments to pass to the requests function. + + Returns: + requests.Response: The response object from the HTTP request. + + Raises: + UploaderException: If the request fails after the maximum retry limit. + """ + for _ in range(self.REQUEST_RETRY_LIMIT): + response = getattr(requests, method)(url, headers=self.headers, **kwargs) + if self._is_response_successful(response): + return response + if method == 'post' and response.status_code >= 400: + raise UploaderException(f"Error during request to {url}", response.json()['detail']) + raise UploaderException("Maximum retry limit reached for request", None) + + @staticmethod + def _is_response_successful(response: requests.Response) -> bool: + """Checks if an HTTP response indicates a successful request. + + Args: + response (requests.Response): The response object to check. + + Returns: + bool: True if the response indicates success, False otherwise. + """ + return 200 <= response.status_code < 300 + +def upload_and_get_transcription(file: str, api_key: str, quota: int) -> Dict[str, str]: + """Uploads a file and returns its transcription. + + Args: + file (str): The path to the file to be uploaded. + api_key (str): The API key for authentication. + quota (int): The quota limit for the user. + + Returns: + Dict[str, str]: A dictionary containing the transcription or error message. + + Raises: + UploaderException: If there's an issue with the file upload. + """ + try: + uploader = Uploader(file, api_key, quota) + result = uploader.upload_file() + except FileNotFoundError as fnfe: + return {"status": "error", "message": str(fnfe)} + except UploaderException as ue: + return {"status": "error", "message": str(ue)} + except Exception as e: + return {"status": "error", "message": f"Unexpected error: {str(e)}"} -def upload_and_get_transcription(file, api_key, quota) -> dict: - result: dict = Uploader(file, api_key, quota).upload_file() update_myself() - return result + return result \ No newline at end of file From 40c7f88d3a25127439ce131fbbdd65f8dedba81f Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Thu, 21 Dec 2023 15:33:38 +0100 Subject: [PATCH 6/9] print estimated_processing_time returned by api --- src/mediacatch_s2t/uploader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index dca4cc8..c368172 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -147,11 +147,12 @@ def _upload_chunk_to_storage(self, url: str, chunk: bytes) -> str: def finalize_upload(self) -> None: """Finalizes the file upload process, indicating all chunks have been uploaded.""" - self._make_request( + response = self._make_request( 'post', self.endpoints['complete'].format(file_id=self.file_id), json={"parts": self.etags} ) + self.estimated_processing_time = response.json()['estimated_processing_time'] def get_upload_result(self) -> Dict[str, str]: """Constructs the final result of the file upload process. @@ -162,7 +163,7 @@ def get_upload_result(self) -> Dict[str, str]: return { "url": self.endpoints['result'].format(file_id=self.file_id), "status": "uploaded", - "estimated_processing_time": 0, + "estimated_processing_time": self.estimated_processing_time, "message": "The file has been uploaded." } From 91cd96095afe331e6ec4c509a11abc3fa8cd2979 Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Fri, 22 Dec 2023 13:23:52 +0100 Subject: [PATCH 7/9] Allow quota=None in python interface --- src/mediacatch_s2t/uploader.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index c368172..8432d46 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -1,7 +1,7 @@ import requests from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path -from typing import Dict, Optional, Generator +from typing import Optional, Generator from mediacatch_s2t import ( URL, @@ -39,7 +39,7 @@ class Uploader: CHUNK_SIZE = 100 * 1024 * 1024 # 100 MB REQUEST_RETRY_LIMIT = 3 - def __init__(self, file: str, api_key: str, quota: int, max_threads: int = 5) -> None: + def __init__(self, file: str, api_key: str, quota: str | None = None, max_threads: int = 5) -> None: self.file_path = Path(file) if not self.file_path.is_file(): raise FileNotFoundError(f"The file {file} does not exist") @@ -61,11 +61,11 @@ def __init__(self, file: str, api_key: str, quota: int, max_threads: int = 5) -> } self.max_threads = max_threads - def upload_file(self) -> Dict[str, str]: + def upload_file(self) -> dict[str, str]: """Initiates and manages the file upload process. Returns: - Dict[str, str]: A dictionary containing the result of the upload process. + dict[str, str]: A dictionary containing the result of the upload process. """ try: self.file_id = self.initiate_file_upload() @@ -154,11 +154,11 @@ def finalize_upload(self) -> None: ) self.estimated_processing_time = response.json()['estimated_processing_time'] - def get_upload_result(self) -> Dict[str, str]: + def get_upload_result(self) -> dict[str, str]: """Constructs the final result of the file upload process. Returns: - Dict[str, str]: A dictionary containing details of the upload, including the file URL. + dict[str, str]: A dictionary containing details of the upload, including the file URL. """ return { "url": self.endpoints['result'].format(file_id=self.file_id), @@ -201,16 +201,16 @@ def _is_response_successful(response: requests.Response) -> bool: """ return 200 <= response.status_code < 300 -def upload_and_get_transcription(file: str, api_key: str, quota: int) -> Dict[str, str]: +def upload_and_get_transcription(file: str, api_key: str, quota: str | None = None) -> dict[str, str]: """Uploads a file and returns its transcription. Args: file (str): The path to the file to be uploaded. api_key (str): The API key for authentication. - quota (int): The quota limit for the user. + quota (str | None): The quota to bill transcription hours from. Use None if user only has 1 quota. Returns: - Dict[str, str]: A dictionary containing the transcription or error message. + dict[str, str]: A dictionary containing the transcription or error message. Raises: UploaderException: If there's an issue with the file upload. From ae3cbba7d2be2bad635b6a0c86392c7392ae14e2 Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Fri, 22 Dec 2023 15:24:28 +0100 Subject: [PATCH 8/9] Fix tests for v2 --- tests/test_exception.py | 8 +- tests/test_multipart_upload.py | 259 --------------------------------- tests/test_uploader.py | 75 ---------- 3 files changed, 4 insertions(+), 338 deletions(-) delete mode 100644 tests/test_multipart_upload.py delete mode 100644 tests/test_uploader.py diff --git a/tests/test_exception.py b/tests/test_exception.py index de748b5..d8f835f 100644 --- a/tests/test_exception.py +++ b/tests/test_exception.py @@ -5,11 +5,11 @@ class TestUploaderException: def test_UploaderException_without_cause(self): - new_exception = UploaderException() - assert str(new_exception) == "Error from uploader module" + new_exception = UploaderException('Test message') + assert str(new_exception) == 'Test message' def test_UploderException_with_cause(self): - new_exception = UploaderException("Test Exception") + new_exception = UploaderException('Test message', 'Test Exception') assert str(new_exception) == ( - "Error from uploader module: Test Exception" + "Test message: Test Exception" ) diff --git a/tests/test_multipart_upload.py b/tests/test_multipart_upload.py deleted file mode 100644 index 3b36352..0000000 --- a/tests/test_multipart_upload.py +++ /dev/null @@ -1,259 +0,0 @@ -from unittest import mock - -import pytest -import responses - -from mediacatch_s2t import ( - URL, MULTIPART_UPLOAD_CREATE_ENDPOINT, MULTIPART_UPLOAD_URL_ENDPOINT, - MULTIPART_UPLOAD_COMPLETE_ENDPOINT, - UPDATE_STATUS_ENDPOINT -) -from mediacatch_s2t.uploader import ( - ChunkedFileUploader, Uploader, UploaderException) - - -class TestMultipartUpload: - create_multipart_url = f"{URL}{MULTIPART_UPLOAD_CREATE_ENDPOINT}" - get_signed_url = f"{URL}{MULTIPART_UPLOAD_URL_ENDPOINT}" - complete_upload_url = f"{URL}{MULTIPART_UPLOAD_COMPLETE_ENDPOINT}" - update_status_url = f"{URL}{UPDATE_STATUS_ENDPOINT}" - chunk_maxsize = 20480000 - filesize = (500 * chunk_maxsize) + 10000 - file_id = "644f6676997bc2477563246e" - upload_id = "2~iRldDSPjP1cJCXg-7NmR9Sd4xpX_Cii" - mime_file = { - "duration": 1000, - "filename": "file-test", - "file_ext": ".mp4", - "filesize": filesize, - "language": "da", - } - - @pytest.fixture(autouse=True) - def _mock_pathlib_path(self): - with mock.patch("pathlib.Path") as mock_Path: - def side_effect(): - return True - mock_Path.return_value.name = 'file-test' - mock_Path.return_value.suffix = '.mp4' - mock_Path.return_value.is_file.side_effect = side_effect - yield mock_Path - - @pytest.fixture(autouse=True) - def _mock_os_getsize(self): - with mock.patch("os.path.getsize") as mock_getsize: - mock_getsize.return_value = self.filesize - yield mock_getsize - - @pytest.fixture(autouse=True) - def _mock_builtins_open(self): - with mock.patch("builtins.open", mock.mock_open(read_data="data")) as mock_open: - yield mock_open - - @pytest.fixture(autouse=True) - def _mock_get_duration(self): - with mock.patch("mediacatch_s2t.uploader.ChunkedFileUploader.get_duration") as mock_duration: - mock_duration.return_value = 100000, {} - yield mock_duration - - @pytest.fixture(autouse=True) - def _mock_chop_and_upload_chunk(self): - with mock.patch("mediacatch_s2t.uploader.ChunkedFileUploader.chop_and_upload_chunk") as mocker: - mocker.return_value = None - yield mocker - - - @pytest.fixture() - def _mock_endpoints(self): - with responses.RequestsMock() as resp: - resp.add( - responses.POST, - self.create_multipart_url, - status=200, - json={ - "file_id": self.file_id, - "chunk_maxsize": self.chunk_maxsize, - "total_chunks": 500 + 1, - "upload_id": self.upload_id - } - ) - resp.add( - responses.POST, - url=self.complete_upload_url, - status=201 - ) - resp.add( - responses.POST, - url=self.update_status_url, - status=201 - ) - yield resp - - @responses.activate - def test_create_multipart_upload_return_success(self): - responses.add( - responses.POST, - self.create_multipart_url, - status=200, - json={ - "file_id": self.file_id, - "chunk_maxsize": self.chunk_maxsize, - "total_chunks": 500 + 1, - "upload_id": self.upload_id - } - ) - file = ChunkedFileUploader( - file='file-test.mp4', - api_key='test-key' - ) - result = file.create_multipart_upload(self.mime_file) - assert result == { - "file_id": self.file_id, - "chunk_maxsize": self.chunk_maxsize, - "total_chunks": 500 + 1, - "upload_id": self.upload_id - } - - @responses.activate - def test_get_signed_url_return_url(self): - responses.add( - responses.POST, - url=self.get_signed_url, - status=200, - json={ - "file_id": self.file_id, - "upload_id": self.upload_id, - "part_number": 1, - "url": "signed-upload-url" - } - ) - file = ChunkedFileUploader( - file='file-test.mp4', - api_key='test-key' - ) - result = file._get_signed_url(1) - assert result == "signed-upload-url" - - @responses.activate - @mock.patch("mediacatch_s2t.uploader.ChunkedFileUploader._get_signed_url") - def test_upload_part_return_etag(self, mocker): - mocker.return_value = "http://signed-upload-url" - - responses.add( - responses.PUT, - "http://signed-upload-url", - status=200, - headers={ - 'ETag': 'etag-from-s3' - } - ) - - file = ChunkedFileUploader("file-test.mp4", "test-key") - url = file._get_signed_url(1) - assert url == "http://signed-upload-url" - file_data = b'' - etag = file._upload_data_chunk_to_bucket(url, file_data) - assert etag == 'etag-from-s3' - - def test_upload_file(self, _mock_endpoints): - chunked_file = ChunkedFileUploader("file-test.mp4", "test-key") - - assert chunked_file._is_file_exist() is True - - file_duration, msg = chunked_file.get_duration() - assert file_duration == 100000 - - assert chunked_file.filename == "file-test" - assert chunked_file.file_ext == ".mp4" - assert chunked_file.filesize == 10240010000 - assert chunked_file.language == 'da' - - mime_file = { - "duration": file_duration, - "filename": chunked_file.filename, - "file_ext": chunked_file.file_ext, - "filesize": chunked_file.filesize, - "language": chunked_file.language, - } - meta = chunked_file.create_multipart_upload(mime_file) - chunked_file._set_metadata( - file_id=meta["file_id"], - chunk_maxsize=meta["chunk_maxsize"], - total_chunks=meta["total_chunks"], - upload_id=meta["upload_id"] - ) - assert chunked_file.file_id == self.file_id - assert chunked_file.chunk_maxsize == self.chunk_maxsize - assert chunked_file.total_chunks == 501 - assert chunked_file.upload_id == self.upload_id - - chunked_file.chop_and_upload_chunk() - - assert chunked_file.complete_the_upload() is True - - link = 'https://s2t.mediacatch.io/result?id=644f6676997bc2477563246e&api_key=test-key' - assert chunked_file._get_transcript_link() == link - - result = chunked_file.upload_file() - assert result == { - "url": link, - "status": "uploaded", - "estimated_processing_time": 10, - "message": "The file has been uploaded." - } - - -class TestUploaderMethod: - @pytest.fixture() - def _mock_is_file_exist_true(self): - with mock.patch( - "mediacatch_s2t.uploader.Uploader._is_file_exist") as mocker: - mocker.return_value = True - yield mocker - - @mock.patch("os.path.getsize", return_value=10240010000) - def test_is_multipart_upload_return_true(self, mocker, _mock_is_file_exist_true): - file = Uploader("file-test.mp4", "test-key") - assert file.is_multipart_upload() is True - - @mock.patch("os.path.getsize", return_value=10) - def test_is_multipart_upload_return_false(self, mocker, _mock_is_file_exist_true): - file = Uploader("file-test.mp4", "test-key") - assert file.is_multipart_upload() is False - - def test_is_multipart_upload_file_not_exists(self): - file = Uploader("file-test.mp4", "test-key") - assert file.is_multipart_upload() is False - - - def test_is_response_error_return_true(self): - response = mock.Mock() - response.status_code = 401 - response.json.return_value = {"message": "an error 401 test message"} - - file = Uploader("file-test.mp4", "test-key") - result = file._is_response_error(response) - assert result == (True, "an error 401 test message") - - response.status_code = 500 - response.json.return_value = {"message": "an error 500 test message"} - result = file._is_response_error(response) - assert result == (True, "an error 500 test message") - - @responses.activate - def test_make_post_request_raise_exception(self): - responses.add( - responses.POST, - url="http://test-500", - json={"message": "test error 500"}, - status=500 - ) - - file = Uploader("file-test.mp4", "test-key") - - with pytest.raises(UploaderException) as exc_info: - file._make_post_request(url="http://test-500") - assert str(exc_info.value) == ( - "Error from uploader module: Error during post request " - "http://test-500; test error 500" - ) diff --git a/tests/test_uploader.py b/tests/test_uploader.py deleted file mode 100644 index 27111ca..0000000 --- a/tests/test_uploader.py +++ /dev/null @@ -1,75 +0,0 @@ -from unittest import mock - -import responses -from mediacatch_s2t import URL, SINGLE_UPLOAD_ENDPOINT, TRANSCRIPT_ENDPOINT, UPDATE_STATUS_ENDPOINT -from mediacatch_s2t.uploader import upload_and_get_transcription, Uploader - - -@mock.patch("pathlib.Path.is_file", return_value=True) -def test_is_file_exist_mocked_return_true(mock_is_file): - assert Uploader('fake file', 'fake key')._is_file_exist() is True - - -@mock.patch("subprocess.run") -def test_get_duration_mocked_return_value(mock_subprocess): - mock_subprocess.return_value.returncode = 0 - mock_subprocess.return_value.stdout = '{"streams": [{"codec_type": "audio", "duration": "1"}]}' - mock_subprocess.return_value.stderr = None - assert Uploader('fake file', 'fake key').get_duration() == (1000, {'codec_type': 'audio', 'duration': '1'}) - - -@mock.patch("subprocess.run") -def test_get_duration_audio_not_available_mocked_return_value(mock_subprocess): - mock_subprocess.return_value.returncode = 0 - mock_subprocess.return_value.stdout = '{"streams": [{"codec_type": "audio"}], "format": {"duration": "1"}}' - mock_subprocess.return_value.stderr = None - assert Uploader('fake file', 'fake key').get_duration() == (1000, {"duration": "1"}) - - -def test_estimated_result_time(): - assert Uploader('fake file', 'fake key').estimated_result_time(10000) == 1 - -@responses.activate -@mock.patch("builtins.open", new_callable=mock.mock_open, - read_data="bytes of data") -@mock.patch("pathlib.Path") -@mock.patch("os.path.getsize", return_value=100) -@mock.patch("subprocess.run") -def test_upload_succeed(mock_subprocess, mock_getsize, mock_Path, mock_open): - URL_EXAMPLE = 'http://url-for-upload.example.com' - - def side_effect(): - return True - mock_Path.return_value.name = 'name' - mock_Path.return_value.suffix = '.avi' - mock_Path.return_value.is_file.side_effect = side_effect - - mock_subprocess.return_value.returncode = 0 - mock_subprocess.return_value.stdout = '{"streams": [{"codec_type": "audio", "duration": 100}]}' - mock_subprocess.return_value.stderr = None - - responses.add( - responses.POST, f'{URL}{SINGLE_UPLOAD_ENDPOINT}', status=200, - json={ - 'url': URL_EXAMPLE, - 'fields': {'key': 'all fields we need'}, - 'id': 'some-id' - } - ) - responses.add( - responses.POST, f'{URL}{UPDATE_STATUS_ENDPOINT}', status=204 - ) - responses.add( - responses.POST, f'{URL}{TRANSCRIPT_ENDPOINT}', status=200 - ) - responses.add( - responses.POST, URL_EXAMPLE, status=200 - ) - expected_output = { - 'estimated_processing_time': 10, - 'message': 'The file has been uploaded.', - 'status': 'uploaded', - 'url': 'https://s2t.mediacatch.io/result?id=some-id&api_key=fake-key' - } - assert Uploader('fake-file', 'fake-key', 'fake-language').upload_file() == expected_output - From 758d1fea1be40d6c399510a3fbc06d10b51eb8c6 Mon Sep 17 00:00:00 2001 From: Frederik Haarslev Date: Fri, 22 Dec 2023 15:29:52 +0100 Subject: [PATCH 9/9] Fix type hints for python 3.7-3.9 --- src/mediacatch_s2t/uploader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py index 8432d46..7dff8dd 100644 --- a/src/mediacatch_s2t/uploader.py +++ b/src/mediacatch_s2t/uploader.py @@ -39,7 +39,7 @@ class Uploader: CHUNK_SIZE = 100 * 1024 * 1024 # 100 MB REQUEST_RETRY_LIMIT = 3 - def __init__(self, file: str, api_key: str, quota: str | None = None, max_threads: int = 5) -> None: + def __init__(self, file: str, api_key: str, quota: Optional[str] = None, max_threads: int = 5) -> None: self.file_path = Path(file) if not self.file_path.is_file(): raise FileNotFoundError(f"The file {file} does not exist") @@ -201,7 +201,7 @@ def _is_response_successful(response: requests.Response) -> bool: """ return 200 <= response.status_code < 300 -def upload_and_get_transcription(file: str, api_key: str, quota: str | None = None) -> dict[str, str]: +def upload_and_get_transcription(file: str, api_key: str, quota: Optional[str] = None) -> dict[str, str]: """Uploads a file and returns its transcription. Args: