fix complict

gist-ailab · Nov 8, 2024 · 7358971 · 7358971
2 parents fe370b3 + 5ec83f4
commit 7358971
Show file tree

Hide file tree

Showing 44 changed files with 5,150 additions and 16 deletions.
diff --git a/__pycache__/trainer.cpython-37.pyc b/__pycache__/trainer.cpython-37.pyc
diff --git a/bert/__pycache__/activations.cpython-37.pyc b/bert/__pycache__/activations.cpython-37.pyc
diff --git a/bert/__pycache__/configuration_bert.cpython-37.pyc b/bert/__pycache__/configuration_bert.cpython-37.pyc
diff --git a/bert/__pycache__/configuration_utils.cpython-37.pyc b/bert/__pycache__/configuration_utils.cpython-37.pyc
diff --git a/bert/__pycache__/file_utils.cpython-37.pyc b/bert/__pycache__/file_utils.cpython-37.pyc
diff --git a/bert/__pycache__/generation_utils.cpython-37.pyc b/bert/__pycache__/generation_utils.cpython-37.pyc
diff --git a/bert/__pycache__/modeling_bert.cpython-37.pyc b/bert/__pycache__/modeling_bert.cpython-37.pyc
diff --git a/bert/__pycache__/modeling_utils.cpython-37.pyc b/bert/__pycache__/modeling_utils.cpython-37.pyc
diff --git a/bert/__pycache__/tokenization_bert.cpython-37.pyc b/bert/__pycache__/tokenization_bert.cpython-37.pyc
diff --git a/bert/__pycache__/tokenization_utils.cpython-37.pyc b/bert/__pycache__/tokenization_utils.cpython-37.pyc
diff --git a/bert/__pycache__/tokenization_utils_base.cpython-37.pyc b/bert/__pycache__/tokenization_utils_base.cpython-37.pyc
diff --git a/criterions/__pycache__/__init__.cpython-37.pyc b/criterions/__pycache__/__init__.cpython-37.pyc
diff --git a/criterions/__pycache__/label_smoothed_cross_entropy.cpython-37.pyc b/criterions/__pycache__/label_smoothed_cross_entropy.cpython-37.pyc
diff --git a/data/__pycache__/__init__.cpython-37.pyc b/data/__pycache__/__init__.cpython-37.pyc
diff --git a/data/__pycache__/base_dataset.cpython-37.pyc b/data/__pycache__/base_dataset.cpython-37.pyc
diff --git a/data/__pycache__/data_utils.cpython-37.pyc b/data/__pycache__/data_utils.cpython-37.pyc
diff --git a/data/__pycache__/file_dataset.cpython-37.pyc b/data/__pycache__/file_dataset.cpython-37.pyc
diff --git a/data/__pycache__/poly_utils.cpython-37.pyc b/data/__pycache__/poly_utils.cpython-37.pyc
diff --git a/data/__pycache__/refcoco_dataset.cpython-37.pyc b/data/__pycache__/refcoco_dataset.cpython-37.pyc
diff --git a/data/__pycache__/refcoco_pretrain_dataset.cpython-37.pyc b/data/__pycache__/refcoco_pretrain_dataset.cpython-37.pyc
diff --git a/data/create_aihub_data.py b/data/create_aihub_data.py
@@ -114,7 +114,11 @@
             img_base64 = image_to_base64(img, format='jpeg')
 
             # load mask
-            ref = refer.loadRefs(this_ref_id)
+            try:
+                ref = refer.loadRefs(this_ref_id)
+            except TypeError:
+                print('None mask error')
+                continue
             ref_mask = np.array(refer.getMask(ref[0])['mask'])
             annot = np.zeros(ref_mask.shape)
             annot[ref_mask == 1] = 1  # 255

diff --git a/data/create_pretraining_aihub_data.py b/data/create_pretraining_aihub_data.py
@@ -3,19 +3,43 @@
 from tqdm import tqdm
 import random
 import pickle
+import pandas as pd
+import glob
 
 
-# img_path = 'refer/data/aihub_refcoco_format/indoor_80/images'
-img_path = 'refer/data/aihub_refcoco_format/manufact_80/images'
+img_path = 'refer/data/aihub_refcoco_format/indoor_80/images'
+# img_path = 'refer/data/aihub_refcoco_format/manufact_80/images'
 
 # load annotation files
 # f = open("datasets/annotations/instances.json")
 # f = open("refer/data/aihub_refcoco_format/indoor_80/instances.json")
 f = open("refer/data/aihub_refcoco_format/manufact_80/instances_2.json")
+f = open("refer/data/aihub_refcoco_format/indoor_80/instances_2.json")
+# f = open("refer/data/aihub_refcoco_format/manufact_80/instances.json")
 print("Loading annotation file")
 data = json.load(f)
 f.close()
 
+# Define the directory containing your CSV files
+csv_dir = 'data/aihub_csv_error_csv/indoor'  # Replace with the actual directory path
+# csv_dir = 'data/aihub_csv_error_csv/manufact'  # Replace with the actual directory path
+csv_files = glob.glob(f'{csv_dir}/*.csv')
+
+# Initialize an empty dictionary to store bounding box values from all CSV files
+bbox_dict = {}
+
+# Load and combine data from all CSV files
+for csv_file in csv_files:
+    bbox_data = pd.read_csv(csv_file)
+
+    # Determine prefix based on the file name
+    prefix = "real_" if "real_" in csv_file else "syn_"
+
+    # Convert filenames to the appropriate format and store in bbox_dict
+    bbox_data['파일명'] = bbox_data['파일명'].apply(lambda x: f'{prefix}{x}')
+    # Update bbox_dict with bbox data from this file
+    bbox_dict.update(dict(zip(bbox_data['파일명'], bbox_data['bbox'])))  # Replace 'bbox_column_name' with actual column name
+
 # load the validation and test image list of refcoco, refcoco+, and refcocog
 # val_test_files = pickle.load(open("data/val_test_files.p", "rb"))
 
@@ -36,7 +60,7 @@
 print(len(data['annotations']))
 
 # ref_file = 'refer/data/aihub_refcoco_format/indoor_80/refs.p'
-ref_file = 'refer/data/aihub_refcoco_format/manufact_80_2/refs.p'
+ref_file = 'refer/data/aihub_refcoco_format/manufact_80/refs.p'
 ref_ann = pickle.load(open(ref_file, 'rb'))
 print(ref_ann[10])
 print(ref_ann[1])
@@ -100,8 +124,39 @@
         print(expressions)
 
 
-    x, y, w, h = bbox
-    box_string = f'{x},{y},{x + w},{y + h}'
+    try:
+        fn = img_dict_i['file_name']
+        img_id = fn.split(".")[0].split("_")[-1]
+
+        # Determine the appropriate prefix for file_name_key
+        prefix = fn.split(".")[0].split("_")[0] + "_"
+        file_name_key = f"{prefix}{img_id}"
+        # load box
+        if file_name_key in bbox_dict:
+            print('bbox dict')
+            # Update bbox value based on CSV data
+            x1, y1, x2, y2 = map(int, bbox_dict[file_name_key].split(','))
+            box_string = f'{x1},{y1},{x2},{y2}'
+        else:
+            # prefix = img_dict_i['file_name'].split('_')[0]
+            # print(prefix)
+            # box = refer.getRefBox(this_ref_id)  # x,y,w,h
+            # Fallback to the default logic if not in combined CSV data
+            if prefix == "real_":
+                x, y, w, h = bbox
+                box_string = f'{x},{y},{x + w},{y + h}'
+            elif prefix == "syn_":
+                x1, y1, x2, y2 = bbox
+                box_string = f'{x1},{y1},{x2},{y2}'
+            else:
+                print("Image must be either real or syn")
+                exit()
+    except TypeError:
+        # print(bbox)
+        print(ann_i)
+        continue
+
+
 
     img_name = img_dict_i['file_name']
     filepath = os.path.join(img_path, img_name)
@@ -149,8 +204,12 @@
     img_dict_i = next((d for d in data['images'] if d["id"] == image_id), None)
     height, width = img_dict_i['height'], img_dict_i['width']
 
-    x, y, w, h = bbox
-    box_string = f'{x},{y},{x + w},{y + h}'
+    try:
+        x, y, w, h = bbox
+        box_string = f'{x},{y},{x + w},{y + h}'
+    except TypeError:
+        print(bbox)
+        continue
 
     img_name = img_dict_i['file_name']
     filepath = os.path.join(img_path, img_name)

diff --git a/data/visualize_aihub_data.py b/data/visualize_aihub_data.py
@@ -0,0 +1,189 @@
+import base64
+import matplotlib.pyplot as plt
+from PIL import Image, ImageDraw, ImageFont
+import io
+import numpy as np
+import os
+import textwrap
+
+def decode_base64_image(base64_str):
+    """Decode a base64-encoded image."""
+    try:
+        img_data = base64.b64decode(base64_str)
+        img = Image.open(io.BytesIO(img_data)).convert('RGB')
+        return img
+    except Exception as e:
+        print(f"Error decoding image: {e}")
+        return None
+
+def generate_mask_from_pts_string(pts_string, image_size):
+    """Generate a binary mask image from the polygon points string."""
+    try:
+        polygons = []
+        for poly_str in pts_string.strip().split(';'):
+            if not poly_str:
+                continue
+            coords = list(map(float, poly_str.strip().split(',')))
+            # Group the coordinates into (x, y) pairs
+            points = [(coords[i], coords[i+1]) for i in range(0, len(coords), 2)]
+            polygons.append(points)
+
+        # Create a blank mask image
+        mask = Image.new('L', image_size, 0)
+        draw = ImageDraw.Draw(mask)
+        # Draw the polygons onto the mask
+        for polygon in polygons:
+            draw.polygon(polygon, outline=1, fill=1)
+        return mask
+    except Exception as e:
+        print(f"Error generating mask from pts_string: {e}")
+        return None
+
+def overlay_mask_on_image(image, mask, alpha=0.5):
+    """Overlay a binary mask on an image."""
+    # Resize mask to match image size if necessary
+    if image.size != mask.size:
+        mask = mask.resize(image.size, resample=Image.NEAREST)
+
+    # Convert images to numpy arrays
+    image_np = np.array(image).astype(np.float32)
+    mask_np = np.array(mask)
+
+    # Ensure mask is binary
+    mask_np = (mask_np > 0)
+
+    # Create the red color array
+    red_color = np.array([255, 0, 0], dtype=np.float32)
+
+    # Apply the overlay where mask is True
+    overlay_np = image_np.copy()
+
+    # Broadcasting the operation over the masked pixels
+    overlay_np[mask_np] = (1 - alpha) * overlay_np[mask_np] + alpha * red_color
+
+    # Convert back to uint8
+    overlay_np = overlay_np.astype(np.uint8)
+
+    # Convert back to PIL Image
+    blended = Image.fromarray(overlay_np)
+    return blended
+
+def add_referring_text(image, text):
+    """Add referring text to the image with text wrapping."""
+    draw = ImageDraw.Draw(image)
+    font_size = max(15, image.size[0] // 50)
+    try:
+        # Update the font path to point to the Korean font
+        font_path = "/usr/share/fonts/truetype/nanum/NanumGothic.ttf"
+        font = ImageFont.truetype(font_path, font_size)
+    except IOError:
+        print("Korean font not found. Text will not be added.")
+        return image
+
+    text_color = (255, 255, 255)  # White color
+    outline_color = (0, 0, 0)     # Black outline
+
+    # Determine the maximum width for the text
+    max_width = image.size[0] - 20  # 10 pixels padding on each side
+
+    # Wrap the text
+    lines = textwrap.wrap(text, width=40)  # Adjust width as needed
+
+    y_text = 10
+    for line in lines:
+        line_width, line_height = font.getsize(line)
+        x_text = 10
+
+        # Draw outline
+        for dx in [-1, 0, 1]:
+            for dy in [-1, 0, 1]:
+                if dx != 0 or dy != 0:
+                    draw.text((x_text+dx, y_text+dy), line, font=font, fill=outline_color)
+
+        # Draw text
+        draw.text((x_text, y_text), line, font=font, fill=text_color)
+        y_text += line_height
+
+    return image
+
+def visualize_tsv_entry(entry, save_dir):
+    """Visualize a single TSV entry and save the image."""
+    # Entry is a line from the TSV file
+    fields = entry.strip().split('\t')
+    if len(fields) != 8:
+        print("Incorrect number of fields:", len(fields))
+        return
+    uniq_id, image_id, sent, box_string, pts_string, img_base64, annot_base64, pts_string_interpolated = fields
+
+    # Decode images
+    img = decode_base64_image(img_base64)
+    if img is None:
+        print("Failed to decode image.")
+        return
+
+    # Generate mask from polygon points
+    mask = generate_mask_from_pts_string(pts_string, img.size)
+    if mask is None:
+        print("Failed to generate mask from polygon.")
+        return
+
+    # Overlay mask on image
+    blended = overlay_mask_on_image(img, mask)
+    if blended is None:
+        print("Failed to overlay mask on image.")
+        return
+
+    # Add referring text to the image
+    blended_with_text = add_referring_text(blended, sent)
+
+    # Save the image
+    os.makedirs(save_dir, exist_ok=True)
+    filename = f"{uniq_id}.png"
+    filepath = os.path.join(save_dir, filename)
+    blended_with_text.save(filepath)
+
+    # Optionally, display the image
+    # plt.figure(figsize=(10, 10))
+    # plt.imshow(blended_with_text)
+    # plt.axis('off')
+    # plt.show()
+
+    print(f"Saved image {filepath}")
+
+def main():
+    # Update this path to point to your TSV file
+    tsv_file = 'datasets/finetune/aihub_indoor_bbox_fix/aihub_indoor_train.tsv'
+    # tsv_file = 'datasets/finetune/aihub_indoor_bbox_fix/aihub_indoor_val.tsv'
+
+    # Directory where the images will be saved
+    save_dir = 'visualizations_train'
+
+    with open(tsv_file, 'r') as f:
+        lines = f.readlines()
+
+    num_entries = len(lines)
+    print(f"Total entries: {num_entries}")
+
+    while True:
+        index = input(f"Enter entry index (0 to {num_entries - 1}), 'all' to process all entries, or 'q' to quit: ")
+        if index.lower() == 'q':
+            break
+        elif index.lower() == 'all':
+            for idx in range(num_entries):
+                entry = lines[idx]
+                visualize_tsv_entry(entry, save_dir)
+            print(f"All images have been saved to {save_dir}")
+            break
+        else:
+            try:
+                index = int(index)
+                if 0 <= index < num_entries:
+                    entry = lines[index]
+                    visualize_tsv_entry(entry, save_dir)
+                else:
+                    print("Index out of range.")
+            except ValueError:
+                print("Invalid input.")
+
+if __name__ == '__main__':
+    main()
diff --git a/models/__pycache__/__init__.cpython-37.pyc b/models/__pycache__/__init__.cpython-37.pyc
diff --git a/models/polyformer/__pycache__/__init__.cpython-37.pyc b/models/polyformer/__pycache__/__init__.cpython-37.pyc
diff --git a/models/polyformer/__pycache__/polyformer.cpython-37.pyc b/models/polyformer/__pycache__/polyformer.cpython-37.pyc
diff --git a/models/polyformer/__pycache__/swin.cpython-37.pyc b/models/polyformer/__pycache__/swin.cpython-37.pyc
diff --git a/models/polyformer/__pycache__/unify_multihead_attention.cpython-37.pyc b/models/polyformer/__pycache__/unify_multihead_attention.cpython-37.pyc
diff --git a/models/polyformer/__pycache__/unify_transformer.cpython-37.pyc b/models/polyformer/__pycache__/unify_transformer.cpython-37.pyc
diff --git a/models/polyformer/__pycache__/unify_transformer_layer.cpython-37.pyc b/models/polyformer/__pycache__/unify_transformer_layer.cpython-37.pyc
diff --git a/polyformer_module/__pycache__/__init__.cpython-37.pyc b/polyformer_module/__pycache__/__init__.cpython-37.pyc
diff --git a/refer/refer.py b/refer/refer.py
@@ -78,15 +78,15 @@ def __init__(self, data_root, dataset='refcoco', splitBy='unc'):
 		# load refs from data/dataset/refs(dataset).json
 		tic = time.time()
 		if dataset in ['aihub_indoor', 'aihub_manufact']:
-			ref_file = osp.join(self.DATA_DIR, 'refs.p')
+			ref_file = osp.join(self.DATA_DIR, 'refs_2.p')
 		else:
 			ref_file = osp.join(self.DATA_DIR, 'refs('+splitBy+').p')
 		self.data = {}
 		self.data['dataset'] = dataset
 		self.data['refs'] = pickle.load(open(ref_file, 'rb'))
 
 		# load annotations from data/dataset/instances.json
-		instances_file = osp.join(self.DATA_DIR, 'instances.json')
+		instances_file = osp.join(self.DATA_DIR, 'instances_2.json')
 		instances = json.load(open(instances_file, 'r'))
 		self.data['images'] = instances['images']
 		self.data['annotations'] = instances['annotations']

diff --git a/run_scripts/evaluation/evaluate_polyformer_b_aihub_indoor.sh b/run_scripts/evaluation/evaluate_polyformer_b_aihub_indoor.sh
@@ -17,8 +17,10 @@ model='polyformer_b'
 num_bins=64
 batch_size=16
 
-dataset='aihub_indoor'
-ckpt_path=../finetune/polyformer_b_aihub_indoor_80_checkpoints/100_5e-5_512/checkpoint_epoch_21.pt
+# dataset='aihub_indoor'
+dataset='aihub_indoor_bbox_fix'
+ckpt_path=../finetune/polyformer_b_aihub_indoor_80_bbox_fix_checkpoints/100_5e-5_512/checkpoint_epoch_4.pt
+
 # dataset='refcocog'
 # ckpt_path=../../weights/polyformer_b_refcocog.pt