Added flag and code that saves ICP output in UI format

icatcherplus · Sep 20, 2023 · 4e107d9 · 4e107d9
1 parent 14d151a
commit 4e107d9
Show file tree

Hide file tree

Showing 3 changed files with 229 additions and 6 deletions.
diff --git a/src/icatcher/cli.py b/src/icatcher/cli.py
@@ -14,6 +14,7 @@
     video,
     models,
     parsers,
+    ui_packaging,
 )
 from icatcher.face_detector import (
     extract_bboxes,
@@ -257,15 +258,24 @@ def create_output_streams(video_path, framerate, resolution, opt):
     :param framerate: video framerate
     :param resolution: video resolution
     :param opt: options
-    :return: video_output_file, prediction_output_file, skip = prediction file already exists
+    :return: video_output_file, prediction_output_file, ui_output_components, skip = prediction file already exists
     """
     video_output_file = None
     prediction_output_file = None
+    ui_output_components = None
     skip = False
+
+    fourcc = cv2.VideoWriter_fourcc(
+        *"MP4V"
+    )  # may need to be adjusted per available codecs & OS
+    if opt.ui_packaging_path:
+        video_creator = lambda path: cv2.VideoWriter(str(path), fourcc, framerate, resolution, True)
+        ui_output_components = ui_packaging.prepare_ui_output_components(
+            opt.ui_packaging_path,
+            video_path,
+            video_creator,
+        )
     if opt.output_video_path:
-        fourcc = cv2.VideoWriter_fourcc(
-            *"MP4V"
-        )  # may need to be adjusted per available codecs & OS
         my_video_path = Path(opt.output_video_path, video_path.stem + "_output.mp4")
         video_output_file = cv2.VideoWriter(
             str(my_video_path), fourcc, framerate, resolution, True
@@ -285,7 +295,7 @@ def create_output_streams(video_path, framerate, resolution, opt):
                         "Annotation output file already exists. Use --overwrite flag to overwrite."
                     )
 
-    return video_output_file, prediction_output_file, skip
+    return video_output_file, prediction_output_file, ui_output_components, skip
 
 
 def predict_from_video(opt):
@@ -343,7 +353,7 @@ def predict_from_video(opt):
             w_start_at,
             w_end_at,
         ) = video.process_video(video_path, opt)
-        video_output_file, prediction_output_file, skip = create_output_streams(
+        video_output_file, prediction_output_file, ui_output_components, skip = create_output_streams(
             video_path, framerate, resolution, opt
         )
         if skip:
@@ -549,6 +559,26 @@ def predict_from_video(opt):
                         pic_in_pic=opt.pic_in_pic,
                     )
                     video_output_file.write(cur_frame)
+                if opt.ui_packaging_path:
+                    if is_from_tracker and opt.track_face:
+                        rect_color = (0, 0, 255)
+                    else:
+                        rect_color = (0, 255, 0)
+                    output_for_ui = ui_packaging.prepare_frame_for_ui(
+                        cur_frame,
+                        cur_bbox,
+                        rect_color=rect_color,
+                        conf=confidences[cursor],
+                        class_text=class_text,
+                        frame_number=frame_count,
+                        pic_in_pic=opt.pic_in_pic,
+                    )
+                    ui_packaging.save_ui_output(
+                        frame_number=frame_count,
+                        cursor=cursor,
+                        ui_output_components=ui_output_components,
+                        output_for_ui=output_for_ui,
+                    )
                 if opt.show_output:
                     if is_from_tracker and opt.track_face:
                         rect_color = (0, 0, 255)
@@ -589,6 +619,17 @@ def predict_from_video(opt):
                 )
             ret_val, frame = cap.read()
             frame_count += 1
+
+        if opt.ui_packaging_path:
+            # Write UI packaging metadata into a JSON file
+            video_fps = video.get_video_stream_meta_data(video_path)["r_frame_rate"].split("/")
+            video_fps = float(video_fps[0]) / float(video_fps[1])
+            ui_packaging.save_ui_metadata(
+                fps=video_fps,
+                frame_count=frame_count,
+                sliding_window_size=opt.sliding_window_size,
+                metadata_file_path=ui_output_components["metadata_path"],
+            )
         # finished processing a video file, cleanup
         cleanup(
             video_output_file,

diff --git a/src/icatcher/options.py b/src/icatcher/options.py
@@ -107,6 +107,10 @@ def parse_arguments(my_string=None):
         "--output_video_path",
         help="If present, annotated video will be saved to this folder.",
     )
+    parser.add_argument(
+        "--ui_packaging_path",
+        help="If present, packages the output data into the UI format.",
+    )
     parser.add_argument(
         "--pic_in_pic",
         action="store_true",

diff --git a/src/icatcher/ui_packaging.py b/src/icatcher/ui_packaging.py
@@ -0,0 +1,178 @@
+import json
+import cv2
+import numpy as np
+from pathlib import Path
+from icatcher import draw
+
+from typing import Callable, Dict, Union, Tuple
+
+
+def prepare_ui_output_components(
+    ui_packaging_path: str, video_path: str, video_creator: Callable
+) -> Dict[str, Union[cv2.VideoWriter, str]]:
+    """
+    Given a path to a directory, prepares a dictionary of paths and videos necessary for the UI.
+
+    :param ui_packaging_path: path to folder in which the output will be saved
+    :param video_path: the original video path
+    :param video_creator: a function to create video files given a path
+    :return: a dictionary mapping each UI component to its path or video writer
+    """
+
+    original_video_path = Path(ui_packaging_path, video_path.stem, "video.mp4")
+    decorated_video_path = Path(
+        ui_packaging_path, video_path.stem, "decorated_video.mp4"
+    )
+    bbox_only_video_path = Path(
+        ui_packaging_path, video_path.stem, "decorated_video_bbox_only.mp4"
+    )
+
+    frames_path = Path(ui_packaging_path, video_path.stem, "frames")
+    decorated_frames_path = Path(ui_packaging_path, video_path.stem, "decorated_frames")
+    bbox_only_frames_path = Path(
+        ui_packaging_path, video_path.stem, "decorated_frames_bbox_only"
+    )
+
+    frames_path.mkdir(parents=True, exist_ok=True)
+    decorated_frames_path.mkdir(parents=True, exist_ok=True)
+    bbox_only_frames_path.mkdir(parents=True, exist_ok=True)
+
+    labels_path = Path(ui_packaging_path, video_path.stem, "labels.txt")
+    metadata_path = Path(ui_packaging_path, video_path.stem, "metadata.json")
+
+    ui_output_components = {
+        "original_video": video_creator(original_video_path),
+        "decorated_video": video_creator(decorated_video_path),
+        "bbox_only_video": video_creator(bbox_only_video_path),
+        "frames_path": frames_path,
+        "decorated_frames_path": decorated_frames_path,
+        "bbox_only_frames_path": bbox_only_frames_path,
+        "labels_path": labels_path,
+        "metadata_path": metadata_path,
+    }
+    return ui_output_components
+
+
+def prepare_frame_for_ui(
+    cur_frame: np.ndarray,
+    cur_bbox: np.ndarray,
+    rect_color: Tuple[int, int, int],
+    conf: np.ndarray,
+    class_text: str,
+    frame_number: int,
+    pic_in_pic: bool,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, str]:
+    """
+    Given a frame and decoration parameters, generates variants of the frame without decoration, with bounding boxes only, and
+     with full decoration, and generates an annotaiton text to be added to the labels file.
+
+    :param cur_frame: image of the frame to prepare
+    :param cur_bbox: bounding box of the face
+    :param rect_color: color of the rectangle representing the bounding box
+    :param conf: model's prediction confidence
+    :param class_text: the predicted class by the model
+    :param frame_number: the index of the frame in `cur_frame` in the video
+    :param pic_in_pic: whether to show a mini picture with detections
+    :return: three images: original image, fully-decorated image, and image with bounding boxes only; and the frame annotation
+    """
+
+    decorated_frame = draw.prepare_frame(
+        cur_frame.copy(),
+        cur_bbox,
+        show_arrow=True,
+        rect_color=rect_color,
+        conf=conf,
+        class_text=class_text,
+        frame_number=frame_number,
+        pic_in_pic=pic_in_pic,
+    )
+
+    bbox_only_frame = draw.prepare_frame(
+        cur_frame.copy(),
+        cur_bbox,
+        show_arrow=True,
+        rect_color=rect_color,
+    )
+
+    label_txt = f"{class_text}, {float(conf):.02}"
+    return (
+        cur_frame,
+        decorated_frame,
+        bbox_only_frame,
+        label_txt,
+    )
+
+
+def save_ui_output(
+    frame_number: int, cursor: int, ui_output_components: Dict, output_for_ui: Tuple
+):
+    """
+    Given the UI components and inference output, saves the output for the current frame in the UI output directory
+
+    :param frame_number: number of the current frame to be saved
+    :param cursor: the frame index to be saved relative to current frame
+    :param ui_output_components: dictionary containing UI components and their paths/video writers
+    :param output_for_ui: a tuple containing the original frame, decorated frame, frame with bounding boxes only, and
+                          annotation text
+    """
+
+    frame_idx = frame_number + cursor + 1
+    original_frame, decorated_frame, bbox_only_frame, label_text = output_for_ui
+
+    # Save raw frame
+    ui_output_components["original_video"].write(original_frame)
+    original_frame_path = Path(
+        ui_output_components["frames_path"], f"frame_{frame_idx:05d}.jpg"
+    )
+    cv2.imwrite(str(original_frame_path), original_frame)
+
+    # Save decorated frame
+    ui_output_components["decorated_video"].write(decorated_frame)
+    decorated_frame_path = Path(
+        ui_output_components["decorated_frames_path"], f"frame_{frame_idx:05d}.jpg"
+    )
+    cv2.imwrite(str(decorated_frame_path), decorated_frame)
+
+    # Save decorated frame
+    ui_output_components["bbox_only_video"].write(bbox_only_frame)
+    bbox_only_frame_path = Path(
+        ui_output_components["bbox_only_frames_path"], f"frame_{frame_idx:05d}.jpg"
+    )
+    cv2.imwrite(str(bbox_only_frame_path), bbox_only_frame)
+
+    # Wrtie new annotation to labels file
+    with open(ui_output_components["labels_path"], "a", newline="") as f:
+        f.write(f"{frame_idx}, {label_text}\n")
+
+
+def save_ui_metadata(
+    fps: float, frame_count: int, sliding_window_size: int, metadata_file_path: Path
+) -> Dict[str, Union[float, int, str]]:
+    """
+    Given metadata information on the video and annotation process, compiles a dict with
+     metadata and saves it as a json file.
+
+    :param fps: frames per second in the saved videos
+    :param frame_count: number of frames in the saved videos
+    :param sliding_window_size: number of frames in rolling window of each datapoint
+    :param metadata_file_path: path of the file to save the metadata to using JSON
+    :return: a dictionary contiaining video and inference metadata for UI visualization
+    """
+
+    fps = round(fps, 2)
+    metadata = {
+        "fps": fps,
+        "numFrames": frame_count - sliding_window_size + 1,
+        "frameOffset": sliding_window_size - 1,
+        "metadataExample": {
+            "baseFramePath": str(Path(metadata_file_path.parent, "frames/")),
+            "baseFileName": "frame",
+            "numDigitsFrame": 5,
+            "frameExt": ".jpg",
+        },
+    }
+
+    with open(metadata_file_path, "w") as f:
+        json.dump(metadata, f, indent=2)
+
+    return metadata