diff --git a/src/app/engine/rasterizers.py b/src/app/engine/rasterizers.py index f9fc2bb..f056a81 100644 --- a/src/app/engine/rasterizers.py +++ b/src/app/engine/rasterizers.py @@ -33,7 +33,7 @@ def rasterize_pdf(self, subfolder_path): pdf_source = os.path.join(subfolder_path, "source.pdf") images = self.__rasterize(pdf_source, subfolder_path, dpi=self._dpi) images = self.__validate_rasterized_images(images, pdf_source, subfolder_path) - return images + return [i.filename for i in images] def __rasterize( self, pdf_source, subfolder_path, start_page=None, last_page=None, dpi=None @@ -65,6 +65,7 @@ def __validate_rasterized_images(self, images, pdf_source, subfolder_path): if res == ResizeResult.SINGLE_PIXEL: single_pixel_pages.append(idx + 1) idx += 1 + im.close() if single_pixel_pages: return self.__rescale_single_page_default_dpi( @@ -89,8 +90,8 @@ def __ensure_image_size(self, idx, im: Image): logger.info( f"resizing image index {idx} from {w},{h} to {scale_w},{scale_h}" ) - resized = im.resize((scale_w, scale_h), resample=Image.LANCZOS) - resized.save(filename) + with im.resize((scale_w, scale_h), resample=Image.LANCZOS) as resized: + resized.save(filename) return ResizeResult.RESIZED return ResizeResult.NOOP diff --git a/src/app/engine/s3.py b/src/app/engine/s3.py index eadcd92..35d1249 100644 --- a/src/app/engine/s3.py +++ b/src/app/engine/s3.py @@ -26,7 +26,9 @@ def __build_bucket_base_url(self): else: return f"https://s3.amazonaws.com/{self._bucket_name}" - def put_images(self, images, submission_id, composite_id, customer_id, space_id): + def put_images( + self, image_paths, submission_id, composite_id, customer_id, space_id + ): s3_uris = [] key_prefix = self.__get_key_prefix( @@ -36,14 +38,14 @@ def put_images(self, images, submission_id, composite_id, customer_id, space_id) with tqdm.tqdm( desc=f"[{submission_id}] Upload images to S3", unit=" image", - total=len(images), + total=len(image_paths), ) as progress_bar: with ThreadPoolExecutor(max_workers=self._upload_threads) as executor: # It's critical that the list of S3 URI's returned by this method is in the # same order as the list of images provided to it. '.map(...)' gives us that, # whilst '.submit(...)' does not. for s3_uri in executor.map( - self.__put_image, repeat(key_prefix), images + self.__put_image, repeat(key_prefix), image_paths ): s3_uris.append(s3_uri) progress_bar.update(1) @@ -52,8 +54,8 @@ def put_images(self, images, submission_id, composite_id, customer_id, space_id) def __get_key_prefix(self, submission_id, composite_id, customer, space): return f"{self._object_key_prefix}/{customer}/{space}/{composite_id or submission_id}" - def __put_image(self, key_prefix, image): - object_key = f"{key_prefix}/{os.path.basename(image.filename)}" - with open(image.filename, "rb") as file: + def __put_image(self, key_prefix, image_path): + object_key = f"{key_prefix}/{os.path.basename(image_path)}" + with open(image_path, "rb") as file: self._client.put_object(Bucket=self._bucket_name, Key=object_key, Body=file) return f"{self._bucket_base_url}/{object_key}" diff --git a/src/app/engine/tasks.py b/src/app/engine/tasks.py index bca2997..3ae7901 100644 --- a/src/app/engine/tasks.py +++ b/src/app/engine/tasks.py @@ -28,8 +28,8 @@ def process_member(args): folder_path = None try: folder_path = __fetch_origin(member, member.json_data["origin"]) - images = __rasterize_composite(member, folder_path) - s3_urls = __push_images_to_dlcs(member, images) + image_paths = __rasterize_composite(member, folder_path) + s3_urls = __push_images_to_dlcs(member, image_paths) dlcs_requests = __build_dlcs_requests(member, s3_urls) dlcs_responses = __initiate_dlcs_ingest(member, dlcs_requests, args["auth"]) return __build_result(member, dlcs_responses) @@ -49,12 +49,12 @@ def __rasterize_composite(member, pdf_path): return pdf_rasterizer.rasterize_pdf(pdf_path) -def __push_images_to_dlcs(member, images): - __update_status(member, "PUSHING_TO_DLCS", image_count=len(images)) +def __push_images_to_dlcs(member, image_paths): + __update_status(member, "PUSHING_TO_DLCS", image_count=len(image_paths)) composite_id = member.json_data.get("compositeId") customer = member.collection.customer space = member.json_data["space"] - return s3_client.put_images(images, member.id, composite_id, customer, space) + return s3_client.put_images(image_paths, member.id, composite_id, customer, space) def __build_dlcs_requests(member, dlcs_uris):